1 | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements semantic analysis for CUDA constructs. |
10 | /// |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Sema/SemaCUDA.h" |
14 | #include "clang/AST/ASTContext.h" |
15 | #include "clang/AST/Decl.h" |
16 | #include "clang/AST/ExprCXX.h" |
17 | #include "clang/Basic/Cuda.h" |
18 | #include "clang/Basic/TargetInfo.h" |
19 | #include "clang/Lex/Preprocessor.h" |
20 | #include "clang/Sema/Lookup.h" |
21 | #include "clang/Sema/Overload.h" |
22 | #include "clang/Sema/ScopeInfo.h" |
23 | #include "clang/Sema/Sema.h" |
24 | #include "clang/Sema/Template.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include <optional> |
27 | using namespace clang; |
28 | |
29 | SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} |
30 | |
31 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
32 | if (!D) |
33 | return false; |
34 | if (auto *A = D->getAttr<AttrT>()) |
35 | return !A->isImplicit(); |
36 | return false; |
37 | } |
38 | |
39 | void SemaCUDA::PushForceHostDevice() { |
40 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
41 | ForceHostDeviceDepth++; |
42 | } |
43 | |
44 | bool SemaCUDA::PopForceHostDevice() { |
45 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
46 | if (ForceHostDeviceDepth == 0) |
47 | return false; |
48 | ForceHostDeviceDepth--; |
49 | return true; |
50 | } |
51 | |
52 | ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
53 | MultiExprArg ExecConfig, |
54 | SourceLocation GGGLoc) { |
55 | FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); |
56 | if (!ConfigDecl) |
57 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) |
58 | << getConfigureFuncName()); |
59 | QualType ConfigQTy = ConfigDecl->getType(); |
60 | |
61 | DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( |
62 | getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
63 | SemaRef.MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl); |
64 | |
65 | return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, |
66 | /*IsExecConfig=*/true); |
67 | } |
68 | |
69 | CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { |
70 | bool HasHostAttr = false; |
71 | bool HasDeviceAttr = false; |
72 | bool HasGlobalAttr = false; |
73 | bool HasInvalidTargetAttr = false; |
74 | for (const ParsedAttr &AL : Attrs) { |
75 | switch (AL.getKind()) { |
76 | case ParsedAttr::AT_CUDAGlobal: |
77 | HasGlobalAttr = true; |
78 | break; |
79 | case ParsedAttr::AT_CUDAHost: |
80 | HasHostAttr = true; |
81 | break; |
82 | case ParsedAttr::AT_CUDADevice: |
83 | HasDeviceAttr = true; |
84 | break; |
85 | case ParsedAttr::AT_CUDAInvalidTarget: |
86 | HasInvalidTargetAttr = true; |
87 | break; |
88 | default: |
89 | break; |
90 | } |
91 | } |
92 | |
93 | if (HasInvalidTargetAttr) |
94 | return CUDAFunctionTarget::InvalidTarget; |
95 | |
96 | if (HasGlobalAttr) |
97 | return CUDAFunctionTarget::Global; |
98 | |
99 | if (HasHostAttr && HasDeviceAttr) |
100 | return CUDAFunctionTarget::HostDevice; |
101 | |
102 | if (HasDeviceAttr) |
103 | return CUDAFunctionTarget::Device; |
104 | |
105 | return CUDAFunctionTarget::Host; |
106 | } |
107 | |
108 | template <typename A> |
109 | static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { |
110 | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { |
111 | return isa<A>(Attribute) && |
112 | !(IgnoreImplicitAttr && Attribute->isImplicit()); |
113 | }); |
114 | } |
115 | |
116 | SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII( |
117 | SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) |
118 | : S(S_) { |
119 | SavedCtx = S.CurCUDATargetCtx; |
120 | assert(K == SemaCUDA::CTCK_InitGlobalVar); |
121 | auto *VD = dyn_cast_or_null<VarDecl>(Val: D); |
122 | if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { |
123 | auto Target = CUDAFunctionTarget::Host; |
124 | if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && |
125 | !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || |
126 | hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || |
127 | hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) |
128 | Target = CUDAFunctionTarget::Device; |
129 | S.CurCUDATargetCtx = {Target, K, VD}; |
130 | } |
131 | } |
132 | |
133 | /// IdentifyTarget - Determine the CUDA compilation target for this function |
134 | CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, |
135 | bool IgnoreImplicitHDAttr) { |
136 | // Code that lives outside a function gets the target from CurCUDATargetCtx. |
137 | if (D == nullptr) |
138 | return CurCUDATargetCtx.Target; |
139 | |
140 | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
141 | return CUDAFunctionTarget::InvalidTarget; |
142 | |
143 | if (D->hasAttr<CUDAGlobalAttr>()) |
144 | return CUDAFunctionTarget::Global; |
145 | |
146 | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { |
147 | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) |
148 | return CUDAFunctionTarget::HostDevice; |
149 | return CUDAFunctionTarget::Device; |
150 | } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { |
151 | return CUDAFunctionTarget::Host; |
152 | } else if ((D->isImplicit() || !D->isUserProvided()) && |
153 | !IgnoreImplicitHDAttr) { |
154 | // Some implicit declarations (like intrinsic functions) are not marked. |
155 | // Set the most lenient target on them for maximal flexibility. |
156 | return CUDAFunctionTarget::HostDevice; |
157 | } |
158 | |
159 | return CUDAFunctionTarget::Host; |
160 | } |
161 | |
162 | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
163 | SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { |
164 | if (Var->hasAttr<HIPManagedAttr>()) |
165 | return CVT_Unified; |
166 | // Only constexpr and const variabless with implicit constant attribute |
167 | // are emitted on both sides. Such variables are promoted to device side |
168 | // only if they have static constant intializers on device side. |
169 | if ((Var->isConstexpr() || Var->getType().isConstQualified()) && |
170 | Var->hasAttr<CUDAConstantAttr>() && |
171 | !hasExplicitAttr<CUDAConstantAttr>(Var)) |
172 | return CVT_Both; |
173 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || |
174 | Var->hasAttr<CUDASharedAttr>() || |
175 | Var->getType()->isCUDADeviceBuiltinSurfaceType() || |
176 | Var->getType()->isCUDADeviceBuiltinTextureType()) |
177 | return CVT_Device; |
178 | // Function-scope static variable without explicit device or constant |
179 | // attribute are emitted |
180 | // - on both sides in host device functions |
181 | // - on device side in device or global functions |
182 | if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { |
183 | switch (IdentifyTarget(FD)) { |
184 | case CUDAFunctionTarget::HostDevice: |
185 | return CVT_Both; |
186 | case CUDAFunctionTarget::Device: |
187 | case CUDAFunctionTarget::Global: |
188 | return CVT_Device; |
189 | default: |
190 | return CVT_Host; |
191 | } |
192 | } |
193 | return CVT_Host; |
194 | } |
195 | |
196 | // * CUDA Call preference table |
197 | // |
198 | // F - from, |
199 | // T - to |
200 | // Ph - preference in host mode |
201 | // Pd - preference in device mode |
202 | // H - handled in (x) |
203 | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
204 | // |
205 | // | F | T | Ph | Pd | H | |
206 | // |----+----+-----+-----+-----+ |
207 | // | d | d | N | N | (c) | |
208 | // | d | g | -- | -- | (a) | |
209 | // | d | h | -- | -- | (e) | |
210 | // | d | hd | HD | HD | (b) | |
211 | // | g | d | N | N | (c) | |
212 | // | g | g | -- | -- | (a) | |
213 | // | g | h | -- | -- | (e) | |
214 | // | g | hd | HD | HD | (b) | |
215 | // | h | d | -- | -- | (e) | |
216 | // | h | g | N | N | (c) | |
217 | // | h | h | N | N | (c) | |
218 | // | h | hd | HD | HD | (b) | |
219 | // | hd | d | WS | SS | (d) | |
220 | // | hd | g | SS | -- |(d/a)| |
221 | // | hd | h | SS | WS | (d) | |
222 | // | hd | hd | HD | HD | (b) | |
223 | |
224 | SemaCUDA::CUDAFunctionPreference |
225 | SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, |
226 | const FunctionDecl *Callee) { |
227 | assert(Callee && "Callee must be valid."); |
228 | |
229 | // Treat ctor/dtor as host device function in device var initializer to allow |
230 | // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor |
231 | // will be diagnosed by checkAllowedInitializer. |
232 | if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && |
233 | CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && |
234 | (isa<CXXConstructorDecl>(Val: Callee) || isa<CXXDestructorDecl>(Val: Callee))) |
235 | return CFP_HostDevice; |
236 | |
237 | CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller); |
238 | CUDAFunctionTarget CalleeTarget = IdentifyTarget(D: Callee); |
239 | |
240 | // If one of the targets is invalid, the check always fails, no matter what |
241 | // the other target is. |
242 | if (CallerTarget == CUDAFunctionTarget::InvalidTarget || |
243 | CalleeTarget == CUDAFunctionTarget::InvalidTarget) |
244 | return CFP_Never; |
245 | |
246 | // (a) Can't call global from some contexts until we support CUDA's |
247 | // dynamic parallelism. |
248 | if (CalleeTarget == CUDAFunctionTarget::Global && |
249 | (CallerTarget == CUDAFunctionTarget::Global || |
250 | CallerTarget == CUDAFunctionTarget::Device)) |
251 | return CFP_Never; |
252 | |
253 | // (b) Calling HostDevice is OK for everyone. |
254 | if (CalleeTarget == CUDAFunctionTarget::HostDevice) |
255 | return CFP_HostDevice; |
256 | |
257 | // (c) Best case scenarios |
258 | if (CalleeTarget == CallerTarget || |
259 | (CallerTarget == CUDAFunctionTarget::Host && |
260 | CalleeTarget == CUDAFunctionTarget::Global) || |
261 | (CallerTarget == CUDAFunctionTarget::Global && |
262 | CalleeTarget == CUDAFunctionTarget::Device)) |
263 | return CFP_Native; |
264 | |
265 | // HipStdPar mode is special, in that assessing whether a device side call to |
266 | // a host target is deferred to a subsequent pass, and cannot unambiguously be |
267 | // adjudicated in the AST, hence we optimistically allow them to pass here. |
268 | if (getLangOpts().HIPStdPar && |
269 | (CallerTarget == CUDAFunctionTarget::Global || |
270 | CallerTarget == CUDAFunctionTarget::Device || |
271 | CallerTarget == CUDAFunctionTarget::HostDevice) && |
272 | CalleeTarget == CUDAFunctionTarget::Host) |
273 | return CFP_HostDevice; |
274 | |
275 | // (d) HostDevice behavior depends on compilation mode. |
276 | if (CallerTarget == CUDAFunctionTarget::HostDevice) { |
277 | // It's OK to call a compilation-mode matching function from an HD one. |
278 | if ((getLangOpts().CUDAIsDevice && |
279 | CalleeTarget == CUDAFunctionTarget::Device) || |
280 | (!getLangOpts().CUDAIsDevice && |
281 | (CalleeTarget == CUDAFunctionTarget::Host || |
282 | CalleeTarget == CUDAFunctionTarget::Global))) |
283 | return CFP_SameSide; |
284 | |
285 | // Calls from HD to non-mode-matching functions (i.e., to host functions |
286 | // when compiling in device mode or to device functions when compiling in |
287 | // host mode) are allowed at the sema level, but eventually rejected if |
288 | // they're ever codegened. TODO: Reject said calls earlier. |
289 | return CFP_WrongSide; |
290 | } |
291 | |
292 | // (e) Calling across device/host boundary is not something you should do. |
293 | if ((CallerTarget == CUDAFunctionTarget::Host && |
294 | CalleeTarget == CUDAFunctionTarget::Device) || |
295 | (CallerTarget == CUDAFunctionTarget::Device && |
296 | CalleeTarget == CUDAFunctionTarget::Host) || |
297 | (CallerTarget == CUDAFunctionTarget::Global && |
298 | CalleeTarget == CUDAFunctionTarget::Host)) |
299 | return CFP_Never; |
300 | |
301 | llvm_unreachable("All cases should've been handled by now."); |
302 | } |
303 | |
304 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
305 | if (!D) |
306 | return false; |
307 | if (auto *A = D->getAttr<AttrT>()) |
308 | return A->isImplicit(); |
309 | return D->isImplicit(); |
310 | } |
311 | |
312 | bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { |
313 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
314 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
315 | return IsImplicitDevAttr && IsImplicitHostAttr; |
316 | } |
317 | |
318 | void SemaCUDA::EraseUnwantedMatches( |
319 | const FunctionDecl *Caller, |
320 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
321 | if (Matches.size() <= 1) |
322 | return; |
323 | |
324 | using Pair = std::pair<DeclAccessPair, FunctionDecl *>; |
325 | |
326 | // Gets the CUDA function preference for a call from Caller to Match. |
327 | auto GetCFP = [&](const Pair &Match) { |
328 | return IdentifyPreference(Caller, Callee: Match.second); |
329 | }; |
330 | |
331 | // Find the best call preference among the functions in Matches. |
332 | CUDAFunctionPreference BestCFP = |
333 | GetCFP(*llvm::max_element(Range&: Matches, C: [&](const Pair &M1, const Pair &M2) { |
334 | return GetCFP(M1) < GetCFP(M2); |
335 | })); |
336 | |
337 | // Erase all functions with lower priority. |
338 | llvm::erase_if(C&: Matches, |
339 | P: [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
340 | } |
341 | |
342 | /// When an implicitly-declared special member has to invoke more than one |
343 | /// base/field special member, conflicts may occur in the targets of these |
344 | /// members. For example, if one base's member __host__ and another's is |
345 | /// __device__, it's a conflict. |
346 | /// This function figures out if the given targets \param Target1 and |
347 | /// \param Target2 conflict, and if they do not it fills in |
348 | /// \param ResolvedTarget with a target that resolves for both calls. |
349 | /// \return true if there's a conflict, false otherwise. |
350 | static bool |
351 | resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, |
352 | CUDAFunctionTarget Target2, |
353 | CUDAFunctionTarget *ResolvedTarget) { |
354 | // Only free functions and static member functions may be global. |
355 | assert(Target1 != CUDAFunctionTarget::Global); |
356 | assert(Target2 != CUDAFunctionTarget::Global); |
357 | |
358 | if (Target1 == CUDAFunctionTarget::HostDevice) { |
359 | *ResolvedTarget = Target2; |
360 | } else if (Target2 == CUDAFunctionTarget::HostDevice) { |
361 | *ResolvedTarget = Target1; |
362 | } else if (Target1 != Target2) { |
363 | return true; |
364 | } else { |
365 | *ResolvedTarget = Target1; |
366 | } |
367 | |
368 | return false; |
369 | } |
370 | |
371 | bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
372 | CXXSpecialMemberKind CSM, |
373 | CXXMethodDecl *MemberDecl, |
374 | bool ConstRHS, |
375 | bool Diagnose) { |
376 | // If MemberDecl is virtual destructor of an explicit template class |
377 | // instantiation, it must be emitted, therefore it needs to be inferred |
378 | // conservatively by ignoring implicit host/device attrs of member and parent |
379 | // dtors called by it. Also, it needs to be checed by deferred diag visitor. |
380 | bool IsExpVDtor = false; |
381 | if (isa<CXXDestructorDecl>(Val: MemberDecl) && MemberDecl->isVirtual()) { |
382 | if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(Val: ClassDecl)) { |
383 | TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind(); |
384 | IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration || |
385 | TSK == TSK_ExplicitInstantiationDefinition; |
386 | } |
387 | } |
388 | if (IsExpVDtor) |
389 | SemaRef.DeclsToCheckForDeferredDiags.insert(MemberDecl); |
390 | |
391 | // If the defaulted special member is defined lexically outside of its |
392 | // owning class, or the special member already has explicit device or host |
393 | // attributes, do not infer. |
394 | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
395 | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
396 | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
397 | bool HasExplicitAttr = |
398 | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || |
399 | (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); |
400 | if (!InClass || HasExplicitAttr) |
401 | return false; |
402 | |
403 | std::optional<CUDAFunctionTarget> InferredTarget; |
404 | |
405 | // We're going to invoke special member lookup; mark that these special |
406 | // members are called from this one, and not from its caller. |
407 | Sema::ContextRAII MethodContext(SemaRef, MemberDecl); |
408 | |
409 | // Look for special members in base classes that should be invoked from here. |
410 | // Infer the target of this member base on the ones it should call. |
411 | // Skip direct and indirect virtual bases for abstract classes. |
412 | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
413 | for (const auto &B : ClassDecl->bases()) { |
414 | if (!B.isVirtual()) { |
415 | Bases.push_back(Elt: &B); |
416 | } |
417 | } |
418 | |
419 | if (!ClassDecl->isAbstract()) { |
420 | llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases())); |
421 | } |
422 | |
423 | for (const auto *B : Bases) { |
424 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
425 | if (!BaseType) { |
426 | continue; |
427 | } |
428 | |
429 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(Val: BaseType->getDecl()); |
430 | Sema::SpecialMemberOverloadResult SMOR = |
431 | SemaRef.LookupSpecialMember(D: BaseClassDecl, SM: CSM, |
432 | /* ConstArg */ ConstRHS, |
433 | /* VolatileArg */ false, |
434 | /* RValueThis */ false, |
435 | /* ConstThis */ false, |
436 | /* VolatileThis */ false); |
437 | |
438 | if (!SMOR.getMethod()) |
439 | continue; |
440 | |
441 | CUDAFunctionTarget BaseMethodTarget = |
442 | IdentifyTarget(SMOR.getMethod(), IsExpVDtor); |
443 | |
444 | if (!InferredTarget) { |
445 | InferredTarget = BaseMethodTarget; |
446 | } else { |
447 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
448 | Target1: *InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &*InferredTarget); |
449 | if (ResolutionError) { |
450 | if (Diagnose) { |
451 | Diag(ClassDecl->getLocation(), |
452 | diag::note_implicit_member_target_infer_collision) |
453 | << (unsigned)CSM << *InferredTarget << BaseMethodTarget; |
454 | } |
455 | MemberDecl->addAttr( |
456 | CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); |
457 | return true; |
458 | } |
459 | } |
460 | } |
461 | |
462 | // Same as for bases, but now for special members of fields. |
463 | for (const auto *F : ClassDecl->fields()) { |
464 | if (F->isInvalidDecl()) { |
465 | continue; |
466 | } |
467 | |
468 | const RecordType *FieldType = |
469 | getASTContext().getBaseElementType(F->getType())->getAs<RecordType>(); |
470 | if (!FieldType) { |
471 | continue; |
472 | } |
473 | |
474 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); |
475 | Sema::SpecialMemberOverloadResult SMOR = |
476 | SemaRef.LookupSpecialMember(FieldRecDecl, CSM, |
477 | /* ConstArg */ ConstRHS && !F->isMutable(), |
478 | /* VolatileArg */ false, |
479 | /* RValueThis */ false, |
480 | /* ConstThis */ false, |
481 | /* VolatileThis */ false); |
482 | |
483 | if (!SMOR.getMethod()) |
484 | continue; |
485 | |
486 | CUDAFunctionTarget FieldMethodTarget = |
487 | IdentifyTarget(SMOR.getMethod(), IsExpVDtor); |
488 | |
489 | if (!InferredTarget) { |
490 | InferredTarget = FieldMethodTarget; |
491 | } else { |
492 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
493 | *InferredTarget, FieldMethodTarget, &*InferredTarget); |
494 | if (ResolutionError) { |
495 | if (Diagnose) { |
496 | Diag(ClassDecl->getLocation(), |
497 | diag::note_implicit_member_target_infer_collision) |
498 | << (unsigned)CSM << *InferredTarget << FieldMethodTarget; |
499 | } |
500 | MemberDecl->addAttr( |
501 | CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); |
502 | return true; |
503 | } |
504 | } |
505 | } |
506 | |
507 | // If no target was inferred, mark this member as __host__ __device__; |
508 | // it's the least restrictive option that can be invoked from any target. |
509 | bool NeedsH = true, NeedsD = true; |
510 | if (InferredTarget) { |
511 | if (*InferredTarget == CUDAFunctionTarget::Device) |
512 | NeedsH = false; |
513 | else if (*InferredTarget == CUDAFunctionTarget::Host) |
514 | NeedsD = false; |
515 | } |
516 | |
517 | // We either setting attributes first time, or the inferred ones must match |
518 | // previously set ones. |
519 | if (NeedsD && !HasD) |
520 | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
521 | if (NeedsH && !HasH) |
522 | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
523 | |
524 | return false; |
525 | } |
526 | |
527 | bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
528 | if (!CD->isDefined() && CD->isTemplateInstantiation()) |
529 | SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl()); |
530 | |
531 | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
532 | // empty at a point in the translation unit, if it is either a |
533 | // trivial constructor |
534 | if (CD->isTrivial()) |
535 | return true; |
536 | |
537 | // ... or it satisfies all of the following conditions: |
538 | // The constructor function has been defined. |
539 | // The constructor function has no parameters, |
540 | // and the function body is an empty compound statement. |
541 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) |
542 | return false; |
543 | |
544 | // Its class has no virtual functions and no virtual base classes. |
545 | if (CD->getParent()->isDynamicClass()) |
546 | return false; |
547 | |
548 | // Union ctor does not call ctors of its data members. |
549 | if (CD->getParent()->isUnion()) |
550 | return true; |
551 | |
552 | // The only form of initializer allowed is an empty constructor. |
553 | // This will recursively check all base classes and member initializers |
554 | if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) { |
555 | if (const CXXConstructExpr *CE = |
556 | dyn_cast<CXXConstructExpr>(Val: CI->getInit())) |
557 | return isEmptyConstructor(Loc, CD: CE->getConstructor()); |
558 | return false; |
559 | })) |
560 | return false; |
561 | |
562 | return true; |
563 | } |
564 | |
565 | bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
566 | // No destructor -> no problem. |
567 | if (!DD) |
568 | return true; |
569 | |
570 | if (!DD->isDefined() && DD->isTemplateInstantiation()) |
571 | SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl()); |
572 | |
573 | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
574 | // empty at a point in the translation unit, if it is either a |
575 | // trivial constructor |
576 | if (DD->isTrivial()) |
577 | return true; |
578 | |
579 | // ... or it satisfies all of the following conditions: |
580 | // The destructor function has been defined. |
581 | // and the function body is an empty compound statement. |
582 | if (!DD->hasTrivialBody()) |
583 | return false; |
584 | |
585 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
586 | |
587 | // Its class has no virtual functions and no virtual base classes. |
588 | if (ClassDecl->isDynamicClass()) |
589 | return false; |
590 | |
591 | // Union does not have base class and union dtor does not call dtors of its |
592 | // data members. |
593 | if (DD->getParent()->isUnion()) |
594 | return true; |
595 | |
596 | // Only empty destructors are allowed. This will recursively check |
597 | // destructors for all base classes... |
598 | if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) { |
599 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
600 | return isEmptyDestructor(Loc, DD: RD->getDestructor()); |
601 | return true; |
602 | })) |
603 | return false; |
604 | |
605 | // ... and member fields. |
606 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { |
607 | if (CXXRecordDecl *RD = Field->getType() |
608 | ->getBaseElementTypeUnsafe() |
609 | ->getAsCXXRecordDecl()) |
610 | return isEmptyDestructor(Loc, RD->getDestructor()); |
611 | return true; |
612 | })) |
613 | return false; |
614 | |
615 | return true; |
616 | } |
617 | |
618 | namespace { |
619 | enum CUDAInitializerCheckKind { |
620 | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
621 | CICK_Shared, // Check initializer for shared variable |
622 | }; |
623 | |
624 | bool IsDependentVar(VarDecl *VD) { |
625 | if (VD->getType()->isDependentType()) |
626 | return true; |
627 | if (const auto *Init = VD->getInit()) |
628 | return Init->isValueDependent(); |
629 | return false; |
630 | } |
631 | |
632 | // Check whether a variable has an allowed initializer for a CUDA device side |
633 | // variable with global storage. \p VD may be a host variable to be checked for |
634 | // potential promotion to device side variable. |
635 | // |
636 | // CUDA/HIP allows only empty constructors as initializers for global |
637 | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
638 | // __shared__ variables whether they are local or not (they all are implicitly |
639 | // static in CUDA). One exception is that CUDA allows constant initializers |
640 | // for __constant__ and __device__ variables. |
641 | bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, |
642 | CUDAInitializerCheckKind CheckKind) { |
643 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
644 | assert(!IsDependentVar(VD) && "do not check dependent var"); |
645 | const Expr *Init = VD->getInit(); |
646 | auto IsEmptyInit = [&](const Expr *Init) { |
647 | if (!Init) |
648 | return true; |
649 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
650 | return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor()); |
651 | } |
652 | return false; |
653 | }; |
654 | auto IsConstantInit = [&](const Expr *Init) { |
655 | assert(Init); |
656 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), |
657 | /*NoWronSidedVars=*/true); |
658 | return Init->isConstantInitializer(S.getASTContext(), |
659 | VD->getType()->isReferenceType()); |
660 | }; |
661 | auto HasEmptyDtor = [&](VarDecl *VD) { |
662 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
663 | return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor()); |
664 | return true; |
665 | }; |
666 | if (CheckKind == CICK_Shared) |
667 | return IsEmptyInit(Init) && HasEmptyDtor(VD); |
668 | return S.getLangOpts().GPUAllowDeviceInit || |
669 | ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); |
670 | } |
671 | } // namespace |
672 | |
673 | void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { |
674 | // Return early if VD is inside a non-instantiated template function since |
675 | // the implicit constructor is not defined yet. |
676 | if (const FunctionDecl *FD = |
677 | dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()); |
678 | FD && FD->isDependentContext()) |
679 | return; |
680 | |
681 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
682 | bool IsDeviceOrConstantVar = |
683 | !IsSharedVar && |
684 | (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); |
685 | if ((IsSharedVar || IsDeviceOrConstantVar) && |
686 | VD->getType().getQualifiers().getAddressSpace() != LangAS::Default) { |
687 | Diag(VD->getLocation(), diag::err_cuda_address_space_gpuvar); |
688 | VD->setInvalidDecl(); |
689 | return; |
690 | } |
691 | // Do not check dependent variables since the ctor/dtor/initializer are not |
692 | // determined. Do it after instantiation. |
693 | if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || |
694 | IsDependentVar(VD)) |
695 | return; |
696 | const Expr *Init = VD->getInit(); |
697 | if (IsDeviceOrConstantVar || IsSharedVar) { |
698 | if (HasAllowedCUDADeviceStaticInitializer( |
699 | S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) |
700 | return; |
701 | Diag(VD->getLocation(), |
702 | IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) |
703 | << Init->getSourceRange(); |
704 | VD->setInvalidDecl(); |
705 | } else { |
706 | // This is a host-side global variable. Check that the initializer is |
707 | // callable from the host side. |
708 | const FunctionDecl *InitFn = nullptr; |
709 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
710 | InitFn = CE->getConstructor(); |
711 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) { |
712 | InitFn = CE->getDirectCallee(); |
713 | } |
714 | if (InitFn) { |
715 | CUDAFunctionTarget InitFnTarget = IdentifyTarget(D: InitFn); |
716 | if (InitFnTarget != CUDAFunctionTarget::Host && |
717 | InitFnTarget != CUDAFunctionTarget::HostDevice) { |
718 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) |
719 | << InitFnTarget << InitFn; |
720 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; |
721 | VD->setInvalidDecl(); |
722 | } |
723 | } |
724 | } |
725 | } |
726 | |
727 | void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( |
728 | const FunctionDecl *Callee) { |
729 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
730 | if (!Caller) |
731 | return; |
732 | |
733 | if (!isImplicitHostDeviceFunction(D: Callee)) |
734 | return; |
735 | |
736 | CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller); |
737 | |
738 | // Record whether an implicit host device function is used on device side. |
739 | if (CallerTarget != CUDAFunctionTarget::Device && |
740 | CallerTarget != CUDAFunctionTarget::Global && |
741 | (CallerTarget != CUDAFunctionTarget::HostDevice || |
742 | (isImplicitHostDeviceFunction(D: Caller) && |
743 | !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) |
744 | return; |
745 | |
746 | getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); |
747 | } |
748 | |
749 | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
750 | // treated as implicitly __host__ __device__, unless: |
751 | // * it is a variadic function (device-side variadic functions are not |
752 | // allowed), or |
753 | // * a __device__ function with this signature was already declared, in which |
754 | // case in which case we output an error, unless the __device__ decl is in a |
755 | // system header, in which case we leave the constexpr function unattributed. |
756 | // |
757 | // In addition, all function decls are treated as __host__ __device__ when |
758 | // ForceHostDeviceDepth > 0 (corresponding to code within a |
759 | // #pragma clang force_cuda_host_device_begin/end |
760 | // pair). |
761 | void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, |
762 | const LookupResult &Previous) { |
763 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
764 | |
765 | if (ForceHostDeviceDepth > 0) { |
766 | if (!NewD->hasAttr<CUDAHostAttr>()) |
767 | NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
768 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
769 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
770 | return; |
771 | } |
772 | |
773 | // If a template function has no host/device/global attributes, |
774 | // make it implicitly host device function. |
775 | if (getLangOpts().OffloadImplicitHostDeviceTemplates && |
776 | !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && |
777 | !NewD->hasAttr<CUDAGlobalAttr>() && |
778 | (NewD->getDescribedFunctionTemplate() || |
779 | NewD->isFunctionTemplateSpecialization())) { |
780 | NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
781 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
782 | return; |
783 | } |
784 | |
785 | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || |
786 | NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || |
787 | NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) |
788 | return; |
789 | |
790 | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
791 | // attributes? |
792 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
793 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D)) |
794 | D = Using->getTargetDecl(); |
795 | FunctionDecl *OldD = D->getAsFunction(); |
796 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
797 | !OldD->hasAttr<CUDAHostAttr>() && |
798 | !SemaRef.IsOverload(NewD, OldD, |
799 | /* UseMemberUsingDeclRules = */ false, |
800 | /* ConsiderCudaAttrs = */ false); |
801 | }; |
802 | auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn); |
803 | if (It != Previous.end()) { |
804 | // We found a __device__ function with the same name and signature as NewD |
805 | // (ignoring CUDA attrs). This is an error unless that function is defined |
806 | // in a system header, in which case we simply return without making NewD |
807 | // host+device. |
808 | NamedDecl *Match = *It; |
809 | if (!SemaRef.getSourceManager().isInSystemHeader(Loc: Match->getLocation())) { |
810 | Diag(NewD->getLocation(), |
811 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
812 | << NewD; |
813 | Diag(Match->getLocation(), |
814 | diag::note_cuda_conflicting_device_function_declared_here); |
815 | } |
816 | return; |
817 | } |
818 | |
819 | NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
820 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
821 | } |
822 | |
823 | // TODO: `__constant__` memory may be a limited resource for certain targets. |
824 | // A safeguard may be needed at the end of compilation pipeline if |
825 | // `__constant__` memory usage goes beyond limit. |
826 | void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { |
827 | // Do not promote dependent variables since the cotr/dtor/initializer are |
828 | // not determined. Do it after instantiation. |
829 | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && |
830 | !VD->hasAttr<CUDASharedAttr>() && |
831 | (VD->isFileVarDecl() || VD->isStaticDataMember()) && |
832 | !IsDependentVar(VD) && |
833 | ((VD->isConstexpr() || VD->getType().isConstQualified()) && |
834 | HasAllowedCUDADeviceStaticInitializer(*this, VD, |
835 | CICK_DeviceOrConstant))) { |
836 | VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); |
837 | } |
838 | } |
839 | |
840 | SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, |
841 | unsigned DiagID) { |
842 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
843 | FunctionDecl *CurFunContext = |
844 | SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
845 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
846 | if (!CurFunContext) |
847 | return SemaDiagnosticBuilder::K_Nop; |
848 | switch (CurrentTarget()) { |
849 | case CUDAFunctionTarget::Global: |
850 | case CUDAFunctionTarget::Device: |
851 | return SemaDiagnosticBuilder::K_Immediate; |
852 | case CUDAFunctionTarget::HostDevice: |
853 | // An HD function counts as host code if we're compiling for host, and |
854 | // device code if we're compiling for device. Defer any errors in device |
855 | // mode until the function is known-emitted. |
856 | if (!getLangOpts().CUDAIsDevice) |
857 | return SemaDiagnosticBuilder::K_Nop; |
858 | if (SemaRef.IsLastErrorImmediate && |
859 | getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) |
860 | return SemaDiagnosticBuilder::K_Immediate; |
861 | return (SemaRef.getEmissionStatus(Decl: CurFunContext) == |
862 | Sema::FunctionEmissionStatus::Emitted) |
863 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
864 | : SemaDiagnosticBuilder::K_Deferred; |
865 | default: |
866 | return SemaDiagnosticBuilder::K_Nop; |
867 | } |
868 | }(); |
869 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); |
870 | } |
871 | |
872 | Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, |
873 | unsigned DiagID) { |
874 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
875 | FunctionDecl *CurFunContext = |
876 | SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
877 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
878 | if (!CurFunContext) |
879 | return SemaDiagnosticBuilder::K_Nop; |
880 | switch (CurrentTarget()) { |
881 | case CUDAFunctionTarget::Host: |
882 | return SemaDiagnosticBuilder::K_Immediate; |
883 | case CUDAFunctionTarget::HostDevice: |
884 | // An HD function counts as host code if we're compiling for host, and |
885 | // device code if we're compiling for device. Defer any errors in device |
886 | // mode until the function is known-emitted. |
887 | if (getLangOpts().CUDAIsDevice) |
888 | return SemaDiagnosticBuilder::K_Nop; |
889 | if (SemaRef.IsLastErrorImmediate && |
890 | getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) |
891 | return SemaDiagnosticBuilder::K_Immediate; |
892 | return (SemaRef.getEmissionStatus(Decl: CurFunContext) == |
893 | Sema::FunctionEmissionStatus::Emitted) |
894 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
895 | : SemaDiagnosticBuilder::K_Deferred; |
896 | default: |
897 | return SemaDiagnosticBuilder::K_Nop; |
898 | } |
899 | }(); |
900 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); |
901 | } |
902 | |
903 | bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { |
904 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
905 | assert(Callee && "Callee may not be null."); |
906 | |
907 | const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); |
908 | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) |
909 | return true; |
910 | |
911 | // FIXME: Is bailing out early correct here? Should we instead assume that |
912 | // the caller is a global initializer? |
913 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
914 | if (!Caller) |
915 | return true; |
916 | |
917 | // If the caller is known-emitted, mark the callee as known-emitted. |
918 | // Otherwise, mark the call in our call graph so we can traverse it later. |
919 | bool CallerKnownEmitted = SemaRef.getEmissionStatus(Decl: Caller) == |
920 | Sema::FunctionEmissionStatus::Emitted; |
921 | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
922 | CallerKnownEmitted] { |
923 | switch (IdentifyPreference(Caller, Callee)) { |
924 | case CFP_Never: |
925 | case CFP_WrongSide: |
926 | assert(Caller && "Never/wrongSide calls require a non-null caller"); |
927 | // If we know the caller will be emitted, we know this wrong-side call |
928 | // will be emitted, so it's an immediate error. Otherwise, defer the |
929 | // error until we know the caller is emitted. |
930 | return CallerKnownEmitted |
931 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
932 | : SemaDiagnosticBuilder::K_Deferred; |
933 | default: |
934 | return SemaDiagnosticBuilder::K_Nop; |
935 | } |
936 | }(); |
937 | |
938 | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
939 | // For -fgpu-rdc, keep track of external kernels used by host functions. |
940 | if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && |
941 | Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && |
942 | (!Caller || (!Caller->getDescribedFunctionTemplate() && |
943 | getASTContext().GetGVALinkageForFunction(Caller) == |
944 | GVA_StrongExternal))) |
945 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); |
946 | return true; |
947 | } |
948 | |
949 | // Avoid emitting this error twice for the same location. Using a hashtable |
950 | // like this is unfortunate, but because we must continue parsing as normal |
951 | // after encountering a deferred error, it's otherwise very tricky for us to |
952 | // ensure that we only emit this deferred error once. |
953 | if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second) |
954 | return true; |
955 | |
956 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, |
957 | SemaRef) |
958 | << IdentifyTarget(Callee) << /*function*/ 0 << Callee |
959 | << IdentifyTarget(Caller); |
960 | if (!Callee->getBuiltinID()) |
961 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
962 | diag::note_previous_decl, Caller, SemaRef) |
963 | << Callee; |
964 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
965 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
966 | } |
967 | |
968 | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
969 | // A lambda function may capture a stack variable by reference when it is |
970 | // defined and uses the capture by reference when the lambda is called. When |
971 | // the capture and use happen on different sides, the capture is invalid and |
972 | // should be diagnosed. |
973 | void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, |
974 | const sema::Capture &Capture) { |
975 | // In host compilation we only need to check lambda functions emitted on host |
976 | // side. In such lambda functions, a reference capture is invalid only |
977 | // if the lambda structure is populated by a device function or kernel then |
978 | // is passed to and called by a host function. However that is impossible, |
979 | // since a device function or kernel can only call a device function, also a |
980 | // kernel cannot pass a lambda back to a host function since we cannot |
981 | // define a kernel argument type which can hold the lambda before the lambda |
982 | // itself is defined. |
983 | if (!getLangOpts().CUDAIsDevice) |
984 | return; |
985 | |
986 | // File-scope lambda can only do init captures for global variables, which |
987 | // results in passing by value for these global variables. |
988 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
989 | if (!Caller) |
990 | return; |
991 | |
992 | // In device compilation, we only need to check lambda functions which are |
993 | // emitted on device side. For such lambdas, a reference capture is invalid |
994 | // only if the lambda structure is populated by a host function then passed |
995 | // to and called in a device function or kernel. |
996 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
997 | bool CallerIsHost = |
998 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
999 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
1000 | if (!ShouldCheck || !Capture.isReferenceCapture()) |
1001 | return; |
1002 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
1003 | if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { |
1004 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
1005 | diag::err_capture_bad_target, Callee, SemaRef) |
1006 | << Capture.getVariable(); |
1007 | } else if (Capture.isThisCapture()) { |
1008 | // Capture of this pointer is allowed since this pointer may be pointing to |
1009 | // managed memory which is accessible on both device and host sides. It only |
1010 | // results in invalid memory access if this pointer points to memory not |
1011 | // accessible on device side. |
1012 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
1013 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
1014 | SemaRef); |
1015 | } |
1016 | } |
1017 | |
1018 | void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { |
1019 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
1020 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) |
1021 | return; |
1022 | Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
1023 | Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
1024 | } |
1025 | |
1026 | void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, |
1027 | const LookupResult &Previous) { |
1028 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
1029 | CUDAFunctionTarget NewTarget = IdentifyTarget(D: NewFD); |
1030 | for (NamedDecl *OldND : Previous) { |
1031 | FunctionDecl *OldFD = OldND->getAsFunction(); |
1032 | if (!OldFD) |
1033 | continue; |
1034 | |
1035 | CUDAFunctionTarget OldTarget = IdentifyTarget(D: OldFD); |
1036 | // Don't allow HD and global functions to overload other functions with the |
1037 | // same signature. We allow overloading based on CUDA attributes so that |
1038 | // functions can have different implementations on the host and device, but |
1039 | // HD/global functions "exist" in some sense on both the host and device, so |
1040 | // should have the same implementation on both sides. |
1041 | if (NewTarget != OldTarget && |
1042 | !SemaRef.IsOverload(New: NewFD, Old: OldFD, /* UseMemberUsingDeclRules = */ false, |
1043 | /* ConsiderCudaAttrs = */ false)) { |
1044 | if ((NewTarget == CUDAFunctionTarget::HostDevice && |
1045 | !(getLangOpts().OffloadImplicitHostDeviceTemplates && |
1046 | isImplicitHostDeviceFunction(D: NewFD) && |
1047 | OldTarget == CUDAFunctionTarget::Device)) || |
1048 | (OldTarget == CUDAFunctionTarget::HostDevice && |
1049 | !(getLangOpts().OffloadImplicitHostDeviceTemplates && |
1050 | isImplicitHostDeviceFunction(D: OldFD) && |
1051 | NewTarget == CUDAFunctionTarget::Device)) || |
1052 | (NewTarget == CUDAFunctionTarget::Global) || |
1053 | (OldTarget == CUDAFunctionTarget::Global)) { |
1054 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) |
1055 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; |
1056 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
1057 | NewFD->setInvalidDecl(); |
1058 | break; |
1059 | } |
1060 | if ((NewTarget == CUDAFunctionTarget::Host && |
1061 | OldTarget == CUDAFunctionTarget::Device) || |
1062 | (NewTarget == CUDAFunctionTarget::Device && |
1063 | OldTarget == CUDAFunctionTarget::Host)) { |
1064 | Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare) |
1065 | << NewTarget << OldTarget; |
1066 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
1067 | } |
1068 | } |
1069 | } |
1070 | } |
1071 | |
1072 | template <typename AttrTy> |
1073 | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
1074 | const FunctionDecl &TemplateFD) { |
1075 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
1076 | AttrTy *Clone = Attribute->clone(S.Context); |
1077 | Clone->setInherited(true); |
1078 | FD->addAttr(A: Clone); |
1079 | } |
1080 | } |
1081 | |
1082 | void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, |
1083 | const FunctionTemplateDecl &TD) { |
1084 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
1085 | copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD); |
1086 | copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD); |
1087 | copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD); |
1088 | } |
1089 | |
1090 | std::string SemaCUDA::getConfigureFuncName() const { |
1091 | if (getLangOpts().OffloadViaLLVM) |
1092 | return "__llvmPushCallConfiguration"; |
1093 | |
1094 | if (getLangOpts().HIP) |
1095 | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" |
1096 | : "hipConfigureCall"; |
1097 | |
1098 | // New CUDA kernel launch sequence. |
1099 | if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), |
1100 | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
1101 | return "__cudaPushCallConfiguration"; |
1102 | |
1103 | // Legacy CUDA kernel configuration call |
1104 | return "cudaConfigureCall"; |
1105 | } |
1106 | |
1107 | // Record any local constexpr variables that are passed one way on the host |
1108 | // and another on the device. |
1109 | void SemaCUDA::recordPotentialODRUsedVariable( |
1110 | MultiExprArg Arguments, OverloadCandidateSet &Candidates) { |
1111 | sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda(); |
1112 | if (!LambdaInfo) |
1113 | return; |
1114 | |
1115 | for (unsigned I = 0; I < Arguments.size(); ++I) { |
1116 | auto *DeclRef = dyn_cast<DeclRefExpr>(Val: Arguments[I]); |
1117 | if (!DeclRef) |
1118 | continue; |
1119 | auto *Variable = dyn_cast<VarDecl>(Val: DeclRef->getDecl()); |
1120 | if (!Variable || !Variable->isLocalVarDecl() || !Variable->isConstexpr()) |
1121 | continue; |
1122 | |
1123 | bool HostByValue = false, HostByRef = false; |
1124 | bool DeviceByValue = false, DeviceByRef = false; |
1125 | |
1126 | for (OverloadCandidate &Candidate : Candidates) { |
1127 | FunctionDecl *Callee = Candidate.Function; |
1128 | if (!Callee || I >= Callee->getNumParams()) |
1129 | continue; |
1130 | |
1131 | CUDAFunctionTarget Target = IdentifyTarget(D: Callee); |
1132 | if (Target == CUDAFunctionTarget::InvalidTarget || |
1133 | Target == CUDAFunctionTarget::Global) |
1134 | continue; |
1135 | |
1136 | bool CoversHost = (Target == CUDAFunctionTarget::Host || |
1137 | Target == CUDAFunctionTarget::HostDevice); |
1138 | bool CoversDevice = (Target == CUDAFunctionTarget::Device || |
1139 | Target == CUDAFunctionTarget::HostDevice); |
1140 | |
1141 | bool IsRef = Callee->getParamDecl(i: I)->getType()->isReferenceType(); |
1142 | HostByValue |= CoversHost && !IsRef; |
1143 | HostByRef |= CoversHost && IsRef; |
1144 | DeviceByValue |= CoversDevice && !IsRef; |
1145 | DeviceByRef |= CoversDevice && IsRef; |
1146 | } |
1147 | |
1148 | if ((HostByValue && DeviceByRef) || (HostByRef && DeviceByValue)) |
1149 | LambdaInfo->CUDAPotentialODRUsedVars.insert(Ptr: Variable); |
1150 | } |
1151 | } |
1152 |
Definitions
- SemaCUDA
- hasExplicitAttr
- PushForceHostDevice
- PopForceHostDevice
- ActOnExecConfigExpr
- IdentifyTarget
- hasAttr
- CUDATargetContextRAII
- IdentifyTarget
- IdentifyTarget
- IdentifyPreference
- hasImplicitAttr
- isImplicitHostDeviceFunction
- EraseUnwantedMatches
- resolveCalleeCUDATargetConflict
- inferTargetForImplicitSpecialMember
- isEmptyConstructor
- isEmptyDestructor
- CUDAInitializerCheckKind
- IsDependentVar
- HasAllowedCUDADeviceStaticInitializer
- checkAllowedInitializer
- RecordImplicitHostDeviceFuncUsedByDevice
- maybeAddHostDeviceAttrs
- MaybeAddConstantAttr
- DiagIfDeviceCode
- DiagIfHostCode
- CheckCall
- CheckLambdaCapture
- SetLambdaAttrs
- checkTargetOverload
- copyAttrIfPresent
- inheritTargetAttrs
- getConfigureFuncName
Improve your Profiling and Debugging skills
Find out more