1 | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements semantic analysis for CUDA constructs. |
10 | /// |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/AST/ASTContext.h" |
14 | #include "clang/AST/Decl.h" |
15 | #include "clang/AST/ExprCXX.h" |
16 | #include "clang/Basic/Cuda.h" |
17 | #include "clang/Basic/TargetInfo.h" |
18 | #include "clang/Lex/Preprocessor.h" |
19 | #include "clang/Sema/Lookup.h" |
20 | #include "clang/Sema/ScopeInfo.h" |
21 | #include "clang/Sema/Sema.h" |
22 | #include "clang/Sema/SemaDiagnostic.h" |
23 | #include "clang/Sema/SemaInternal.h" |
24 | #include "clang/Sema/Template.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include <optional> |
27 | using namespace clang; |
28 | |
29 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
30 | if (!D) |
31 | return false; |
32 | if (auto *A = D->getAttr<AttrT>()) |
33 | return !A->isImplicit(); |
34 | return false; |
35 | } |
36 | |
37 | void Sema::PushForceCUDAHostDevice() { |
38 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
39 | ForceCUDAHostDeviceDepth++; |
40 | } |
41 | |
42 | bool Sema::PopForceCUDAHostDevice() { |
43 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
44 | if (ForceCUDAHostDeviceDepth == 0) |
45 | return false; |
46 | ForceCUDAHostDeviceDepth--; |
47 | return true; |
48 | } |
49 | |
50 | ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
51 | MultiExprArg ExecConfig, |
52 | SourceLocation GGGLoc) { |
53 | FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); |
54 | if (!ConfigDecl) |
55 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) |
56 | << getCudaConfigureFuncName()); |
57 | QualType ConfigQTy = ConfigDecl->getType(); |
58 | |
59 | DeclRefExpr *ConfigDR = new (Context) |
60 | DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
61 | MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl); |
62 | |
63 | return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, |
64 | /*IsExecConfig=*/true); |
65 | } |
66 | |
67 | Sema::CUDAFunctionTarget |
68 | Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { |
69 | bool HasHostAttr = false; |
70 | bool HasDeviceAttr = false; |
71 | bool HasGlobalAttr = false; |
72 | bool HasInvalidTargetAttr = false; |
73 | for (const ParsedAttr &AL : Attrs) { |
74 | switch (AL.getKind()) { |
75 | case ParsedAttr::AT_CUDAGlobal: |
76 | HasGlobalAttr = true; |
77 | break; |
78 | case ParsedAttr::AT_CUDAHost: |
79 | HasHostAttr = true; |
80 | break; |
81 | case ParsedAttr::AT_CUDADevice: |
82 | HasDeviceAttr = true; |
83 | break; |
84 | case ParsedAttr::AT_CUDAInvalidTarget: |
85 | HasInvalidTargetAttr = true; |
86 | break; |
87 | default: |
88 | break; |
89 | } |
90 | } |
91 | |
92 | if (HasInvalidTargetAttr) |
93 | return CFT_InvalidTarget; |
94 | |
95 | if (HasGlobalAttr) |
96 | return CFT_Global; |
97 | |
98 | if (HasHostAttr && HasDeviceAttr) |
99 | return CFT_HostDevice; |
100 | |
101 | if (HasDeviceAttr) |
102 | return CFT_Device; |
103 | |
104 | return CFT_Host; |
105 | } |
106 | |
107 | template <typename A> |
108 | static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { |
109 | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { |
110 | return isa<A>(Attribute) && |
111 | !(IgnoreImplicitAttr && Attribute->isImplicit()); |
112 | }); |
113 | } |
114 | |
115 | Sema::CUDATargetContextRAII::(Sema &S_, |
116 | CUDATargetContextKind K, |
117 | Decl *D) |
118 | : S(S_) { |
119 | SavedCtx = S.CurCUDATargetCtx; |
120 | assert(K == CTCK_InitGlobalVar); |
121 | auto *VD = dyn_cast_or_null<VarDecl>(Val: D); |
122 | if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { |
123 | auto Target = CFT_Host; |
124 | if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && |
125 | !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || |
126 | hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || |
127 | hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) |
128 | Target = CFT_Device; |
129 | S.CurCUDATargetCtx = {Target, K, VD}; |
130 | } |
131 | } |
132 | |
133 | /// IdentifyCUDATarget - Determine the CUDA compilation target for this function |
134 | Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, |
135 | bool IgnoreImplicitHDAttr) { |
136 | // Code that lives outside a function gets the target from CurCUDATargetCtx. |
137 | if (D == nullptr) |
138 | return CurCUDATargetCtx.Target; |
139 | |
140 | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
141 | return CFT_InvalidTarget; |
142 | |
143 | if (D->hasAttr<CUDAGlobalAttr>()) |
144 | return CFT_Global; |
145 | |
146 | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { |
147 | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) |
148 | return CFT_HostDevice; |
149 | return CFT_Device; |
150 | } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { |
151 | return CFT_Host; |
152 | } else if ((D->isImplicit() || !D->isUserProvided()) && |
153 | !IgnoreImplicitHDAttr) { |
154 | // Some implicit declarations (like intrinsic functions) are not marked. |
155 | // Set the most lenient target on them for maximal flexibility. |
156 | return CFT_HostDevice; |
157 | } |
158 | |
159 | return CFT_Host; |
160 | } |
161 | |
162 | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
163 | Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { |
164 | if (Var->hasAttr<HIPManagedAttr>()) |
165 | return CVT_Unified; |
166 | // Only constexpr and const variabless with implicit constant attribute |
167 | // are emitted on both sides. Such variables are promoted to device side |
168 | // only if they have static constant intializers on device side. |
169 | if ((Var->isConstexpr() || Var->getType().isConstQualified()) && |
170 | Var->hasAttr<CUDAConstantAttr>() && |
171 | !hasExplicitAttr<CUDAConstantAttr>(Var)) |
172 | return CVT_Both; |
173 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || |
174 | Var->hasAttr<CUDASharedAttr>() || |
175 | Var->getType()->isCUDADeviceBuiltinSurfaceType() || |
176 | Var->getType()->isCUDADeviceBuiltinTextureType()) |
177 | return CVT_Device; |
178 | // Function-scope static variable without explicit device or constant |
179 | // attribute are emitted |
180 | // - on both sides in host device functions |
181 | // - on device side in device or global functions |
182 | if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { |
183 | switch (IdentifyCUDATarget(FD)) { |
184 | case CFT_HostDevice: |
185 | return CVT_Both; |
186 | case CFT_Device: |
187 | case CFT_Global: |
188 | return CVT_Device; |
189 | default: |
190 | return CVT_Host; |
191 | } |
192 | } |
193 | return CVT_Host; |
194 | } |
195 | |
196 | // * CUDA Call preference table |
197 | // |
198 | // F - from, |
199 | // T - to |
200 | // Ph - preference in host mode |
201 | // Pd - preference in device mode |
202 | // H - handled in (x) |
203 | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
204 | // |
205 | // | F | T | Ph | Pd | H | |
206 | // |----+----+-----+-----+-----+ |
207 | // | d | d | N | N | (c) | |
208 | // | d | g | -- | -- | (a) | |
209 | // | d | h | -- | -- | (e) | |
210 | // | d | hd | HD | HD | (b) | |
211 | // | g | d | N | N | (c) | |
212 | // | g | g | -- | -- | (a) | |
213 | // | g | h | -- | -- | (e) | |
214 | // | g | hd | HD | HD | (b) | |
215 | // | h | d | -- | -- | (e) | |
216 | // | h | g | N | N | (c) | |
217 | // | h | h | N | N | (c) | |
218 | // | h | hd | HD | HD | (b) | |
219 | // | hd | d | WS | SS | (d) | |
220 | // | hd | g | SS | -- |(d/a)| |
221 | // | hd | h | SS | WS | (d) | |
222 | // | hd | hd | HD | HD | (b) | |
223 | |
224 | Sema::CUDAFunctionPreference |
225 | Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, |
226 | const FunctionDecl *Callee) { |
227 | assert(Callee && "Callee must be valid." ); |
228 | |
229 | // Treat ctor/dtor as host device function in device var initializer to allow |
230 | // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor |
231 | // will be diagnosed by checkAllowedCUDAInitializer. |
232 | if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && |
233 | CurCUDATargetCtx.Target == CFT_Device && |
234 | (isa<CXXConstructorDecl>(Val: Callee) || isa<CXXDestructorDecl>(Val: Callee))) |
235 | return CFP_HostDevice; |
236 | |
237 | CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(D: Caller); |
238 | CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(D: Callee); |
239 | |
240 | // If one of the targets is invalid, the check always fails, no matter what |
241 | // the other target is. |
242 | if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) |
243 | return CFP_Never; |
244 | |
245 | // (a) Can't call global from some contexts until we support CUDA's |
246 | // dynamic parallelism. |
247 | if (CalleeTarget == CFT_Global && |
248 | (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) |
249 | return CFP_Never; |
250 | |
251 | // (b) Calling HostDevice is OK for everyone. |
252 | if (CalleeTarget == CFT_HostDevice) |
253 | return CFP_HostDevice; |
254 | |
255 | // (c) Best case scenarios |
256 | if (CalleeTarget == CallerTarget || |
257 | (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || |
258 | (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) |
259 | return CFP_Native; |
260 | |
261 | // HipStdPar mode is special, in that assessing whether a device side call to |
262 | // a host target is deferred to a subsequent pass, and cannot unambiguously be |
263 | // adjudicated in the AST, hence we optimistically allow them to pass here. |
264 | if (getLangOpts().HIPStdPar && |
265 | (CallerTarget == CFT_Global || CallerTarget == CFT_Device || |
266 | CallerTarget == CFT_HostDevice) && |
267 | CalleeTarget == CFT_Host) |
268 | return CFP_HostDevice; |
269 | |
270 | // (d) HostDevice behavior depends on compilation mode. |
271 | if (CallerTarget == CFT_HostDevice) { |
272 | // It's OK to call a compilation-mode matching function from an HD one. |
273 | if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || |
274 | (!getLangOpts().CUDAIsDevice && |
275 | (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) |
276 | return CFP_SameSide; |
277 | |
278 | // Calls from HD to non-mode-matching functions (i.e., to host functions |
279 | // when compiling in device mode or to device functions when compiling in |
280 | // host mode) are allowed at the sema level, but eventually rejected if |
281 | // they're ever codegened. TODO: Reject said calls earlier. |
282 | return CFP_WrongSide; |
283 | } |
284 | |
285 | // (e) Calling across device/host boundary is not something you should do. |
286 | if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || |
287 | (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || |
288 | (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) |
289 | return CFP_Never; |
290 | |
291 | llvm_unreachable("All cases should've been handled by now." ); |
292 | } |
293 | |
294 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
295 | if (!D) |
296 | return false; |
297 | if (auto *A = D->getAttr<AttrT>()) |
298 | return A->isImplicit(); |
299 | return D->isImplicit(); |
300 | } |
301 | |
302 | bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { |
303 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
304 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
305 | return IsImplicitDevAttr && IsImplicitHostAttr; |
306 | } |
307 | |
308 | void Sema::EraseUnwantedCUDAMatches( |
309 | const FunctionDecl *Caller, |
310 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
311 | if (Matches.size() <= 1) |
312 | return; |
313 | |
314 | using Pair = std::pair<DeclAccessPair, FunctionDecl*>; |
315 | |
316 | // Gets the CUDA function preference for a call from Caller to Match. |
317 | auto GetCFP = [&](const Pair &Match) { |
318 | return IdentifyCUDAPreference(Caller, Callee: Match.second); |
319 | }; |
320 | |
321 | // Find the best call preference among the functions in Matches. |
322 | CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( |
323 | first: Matches.begin(), last: Matches.end(), |
324 | comp: [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); |
325 | |
326 | // Erase all functions with lower priority. |
327 | llvm::erase_if(C&: Matches, |
328 | P: [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
329 | } |
330 | |
331 | /// When an implicitly-declared special member has to invoke more than one |
332 | /// base/field special member, conflicts may occur in the targets of these |
333 | /// members. For example, if one base's member __host__ and another's is |
334 | /// __device__, it's a conflict. |
335 | /// This function figures out if the given targets \param Target1 and |
336 | /// \param Target2 conflict, and if they do not it fills in |
337 | /// \param ResolvedTarget with a target that resolves for both calls. |
338 | /// \return true if there's a conflict, false otherwise. |
339 | static bool |
340 | resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, |
341 | Sema::CUDAFunctionTarget Target2, |
342 | Sema::CUDAFunctionTarget *ResolvedTarget) { |
343 | // Only free functions and static member functions may be global. |
344 | assert(Target1 != Sema::CFT_Global); |
345 | assert(Target2 != Sema::CFT_Global); |
346 | |
347 | if (Target1 == Sema::CFT_HostDevice) { |
348 | *ResolvedTarget = Target2; |
349 | } else if (Target2 == Sema::CFT_HostDevice) { |
350 | *ResolvedTarget = Target1; |
351 | } else if (Target1 != Target2) { |
352 | return true; |
353 | } else { |
354 | *ResolvedTarget = Target1; |
355 | } |
356 | |
357 | return false; |
358 | } |
359 | |
360 | bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
361 | CXXSpecialMember CSM, |
362 | CXXMethodDecl *MemberDecl, |
363 | bool ConstRHS, |
364 | bool Diagnose) { |
365 | // If the defaulted special member is defined lexically outside of its |
366 | // owning class, or the special member already has explicit device or host |
367 | // attributes, do not infer. |
368 | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
369 | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
370 | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
371 | bool HasExplicitAttr = |
372 | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || |
373 | (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); |
374 | if (!InClass || HasExplicitAttr) |
375 | return false; |
376 | |
377 | std::optional<CUDAFunctionTarget> InferredTarget; |
378 | |
379 | // We're going to invoke special member lookup; mark that these special |
380 | // members are called from this one, and not from its caller. |
381 | ContextRAII MethodContext(*this, MemberDecl); |
382 | |
383 | // Look for special members in base classes that should be invoked from here. |
384 | // Infer the target of this member base on the ones it should call. |
385 | // Skip direct and indirect virtual bases for abstract classes. |
386 | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
387 | for (const auto &B : ClassDecl->bases()) { |
388 | if (!B.isVirtual()) { |
389 | Bases.push_back(Elt: &B); |
390 | } |
391 | } |
392 | |
393 | if (!ClassDecl->isAbstract()) { |
394 | llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases())); |
395 | } |
396 | |
397 | for (const auto *B : Bases) { |
398 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
399 | if (!BaseType) { |
400 | continue; |
401 | } |
402 | |
403 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(Val: BaseType->getDecl()); |
404 | Sema::SpecialMemberOverloadResult SMOR = |
405 | LookupSpecialMember(D: BaseClassDecl, SM: CSM, |
406 | /* ConstArg */ ConstRHS, |
407 | /* VolatileArg */ false, |
408 | /* RValueThis */ false, |
409 | /* ConstThis */ false, |
410 | /* VolatileThis */ false); |
411 | |
412 | if (!SMOR.getMethod()) |
413 | continue; |
414 | |
415 | CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); |
416 | if (!InferredTarget) { |
417 | InferredTarget = BaseMethodTarget; |
418 | } else { |
419 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
420 | Target1: *InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &*InferredTarget); |
421 | if (ResolutionError) { |
422 | if (Diagnose) { |
423 | Diag(ClassDecl->getLocation(), |
424 | diag::note_implicit_member_target_infer_collision) |
425 | << (unsigned)CSM << *InferredTarget << BaseMethodTarget; |
426 | } |
427 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
428 | return true; |
429 | } |
430 | } |
431 | } |
432 | |
433 | // Same as for bases, but now for special members of fields. |
434 | for (const auto *F : ClassDecl->fields()) { |
435 | if (F->isInvalidDecl()) { |
436 | continue; |
437 | } |
438 | |
439 | const RecordType *FieldType = |
440 | Context.getBaseElementType(F->getType())->getAs<RecordType>(); |
441 | if (!FieldType) { |
442 | continue; |
443 | } |
444 | |
445 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); |
446 | Sema::SpecialMemberOverloadResult SMOR = |
447 | LookupSpecialMember(FieldRecDecl, CSM, |
448 | /* ConstArg */ ConstRHS && !F->isMutable(), |
449 | /* VolatileArg */ false, |
450 | /* RValueThis */ false, |
451 | /* ConstThis */ false, |
452 | /* VolatileThis */ false); |
453 | |
454 | if (!SMOR.getMethod()) |
455 | continue; |
456 | |
457 | CUDAFunctionTarget FieldMethodTarget = |
458 | IdentifyCUDATarget(SMOR.getMethod()); |
459 | if (!InferredTarget) { |
460 | InferredTarget = FieldMethodTarget; |
461 | } else { |
462 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
463 | *InferredTarget, FieldMethodTarget, &*InferredTarget); |
464 | if (ResolutionError) { |
465 | if (Diagnose) { |
466 | Diag(ClassDecl->getLocation(), |
467 | diag::note_implicit_member_target_infer_collision) |
468 | << (unsigned)CSM << *InferredTarget << FieldMethodTarget; |
469 | } |
470 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
471 | return true; |
472 | } |
473 | } |
474 | } |
475 | |
476 | |
477 | // If no target was inferred, mark this member as __host__ __device__; |
478 | // it's the least restrictive option that can be invoked from any target. |
479 | bool NeedsH = true, NeedsD = true; |
480 | if (InferredTarget) { |
481 | if (*InferredTarget == CFT_Device) |
482 | NeedsH = false; |
483 | else if (*InferredTarget == CFT_Host) |
484 | NeedsD = false; |
485 | } |
486 | |
487 | // We either setting attributes first time, or the inferred ones must match |
488 | // previously set ones. |
489 | if (NeedsD && !HasD) |
490 | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
491 | if (NeedsH && !HasH) |
492 | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
493 | |
494 | return false; |
495 | } |
496 | |
497 | bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
498 | if (!CD->isDefined() && CD->isTemplateInstantiation()) |
499 | InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl()); |
500 | |
501 | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
502 | // empty at a point in the translation unit, if it is either a |
503 | // trivial constructor |
504 | if (CD->isTrivial()) |
505 | return true; |
506 | |
507 | // ... or it satisfies all of the following conditions: |
508 | // The constructor function has been defined. |
509 | // The constructor function has no parameters, |
510 | // and the function body is an empty compound statement. |
511 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) |
512 | return false; |
513 | |
514 | // Its class has no virtual functions and no virtual base classes. |
515 | if (CD->getParent()->isDynamicClass()) |
516 | return false; |
517 | |
518 | // Union ctor does not call ctors of its data members. |
519 | if (CD->getParent()->isUnion()) |
520 | return true; |
521 | |
522 | // The only form of initializer allowed is an empty constructor. |
523 | // This will recursively check all base classes and member initializers |
524 | if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) { |
525 | if (const CXXConstructExpr *CE = |
526 | dyn_cast<CXXConstructExpr>(Val: CI->getInit())) |
527 | return isEmptyCudaConstructor(Loc, CD: CE->getConstructor()); |
528 | return false; |
529 | })) |
530 | return false; |
531 | |
532 | return true; |
533 | } |
534 | |
535 | bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
536 | // No destructor -> no problem. |
537 | if (!DD) |
538 | return true; |
539 | |
540 | if (!DD->isDefined() && DD->isTemplateInstantiation()) |
541 | InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl()); |
542 | |
543 | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
544 | // empty at a point in the translation unit, if it is either a |
545 | // trivial constructor |
546 | if (DD->isTrivial()) |
547 | return true; |
548 | |
549 | // ... or it satisfies all of the following conditions: |
550 | // The destructor function has been defined. |
551 | // and the function body is an empty compound statement. |
552 | if (!DD->hasTrivialBody()) |
553 | return false; |
554 | |
555 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
556 | |
557 | // Its class has no virtual functions and no virtual base classes. |
558 | if (ClassDecl->isDynamicClass()) |
559 | return false; |
560 | |
561 | // Union does not have base class and union dtor does not call dtors of its |
562 | // data members. |
563 | if (DD->getParent()->isUnion()) |
564 | return true; |
565 | |
566 | // Only empty destructors are allowed. This will recursively check |
567 | // destructors for all base classes... |
568 | if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) { |
569 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
570 | return isEmptyCudaDestructor(Loc, DD: RD->getDestructor()); |
571 | return true; |
572 | })) |
573 | return false; |
574 | |
575 | // ... and member fields. |
576 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { |
577 | if (CXXRecordDecl *RD = Field->getType() |
578 | ->getBaseElementTypeUnsafe() |
579 | ->getAsCXXRecordDecl()) |
580 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
581 | return true; |
582 | })) |
583 | return false; |
584 | |
585 | return true; |
586 | } |
587 | |
588 | namespace { |
589 | enum CUDAInitializerCheckKind { |
590 | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
591 | CICK_Shared, // Check initializer for shared variable |
592 | }; |
593 | |
594 | bool IsDependentVar(VarDecl *VD) { |
595 | if (VD->getType()->isDependentType()) |
596 | return true; |
597 | if (const auto *Init = VD->getInit()) |
598 | return Init->isValueDependent(); |
599 | return false; |
600 | } |
601 | |
602 | // Check whether a variable has an allowed initializer for a CUDA device side |
603 | // variable with global storage. \p VD may be a host variable to be checked for |
604 | // potential promotion to device side variable. |
605 | // |
606 | // CUDA/HIP allows only empty constructors as initializers for global |
607 | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
608 | // __shared__ variables whether they are local or not (they all are implicitly |
609 | // static in CUDA). One exception is that CUDA allows constant initializers |
610 | // for __constant__ and __device__ variables. |
611 | bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, |
612 | CUDAInitializerCheckKind CheckKind) { |
613 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
614 | assert(!IsDependentVar(VD) && "do not check dependent var" ); |
615 | const Expr *Init = VD->getInit(); |
616 | auto IsEmptyInit = [&](const Expr *Init) { |
617 | if (!Init) |
618 | return true; |
619 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
620 | return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); |
621 | } |
622 | return false; |
623 | }; |
624 | auto IsConstantInit = [&](const Expr *Init) { |
625 | assert(Init); |
626 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, |
627 | /*NoWronSidedVars=*/true); |
628 | return Init->isConstantInitializer(S.Context, |
629 | VD->getType()->isReferenceType()); |
630 | }; |
631 | auto HasEmptyDtor = [&](VarDecl *VD) { |
632 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
633 | return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); |
634 | return true; |
635 | }; |
636 | if (CheckKind == CICK_Shared) |
637 | return IsEmptyInit(Init) && HasEmptyDtor(VD); |
638 | return S.LangOpts.GPUAllowDeviceInit || |
639 | ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); |
640 | } |
641 | } // namespace |
642 | |
643 | void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { |
644 | // Return early if VD is inside a non-instantiated template function since |
645 | // the implicit constructor is not defined yet. |
646 | if (const FunctionDecl *FD = |
647 | dyn_cast_or_null<FunctionDecl>(VD->getDeclContext())) |
648 | if (FD->isDependentContext()) |
649 | return; |
650 | |
651 | // Do not check dependent variables since the ctor/dtor/initializer are not |
652 | // determined. Do it after instantiation. |
653 | if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || |
654 | IsDependentVar(VD)) |
655 | return; |
656 | const Expr *Init = VD->getInit(); |
657 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
658 | bool IsDeviceOrConstantVar = |
659 | !IsSharedVar && |
660 | (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); |
661 | if (IsDeviceOrConstantVar || IsSharedVar) { |
662 | if (HasAllowedCUDADeviceStaticInitializer( |
663 | S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) |
664 | return; |
665 | Diag(VD->getLocation(), |
666 | IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) |
667 | << Init->getSourceRange(); |
668 | VD->setInvalidDecl(); |
669 | } else { |
670 | // This is a host-side global variable. Check that the initializer is |
671 | // callable from the host side. |
672 | const FunctionDecl *InitFn = nullptr; |
673 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
674 | InitFn = CE->getConstructor(); |
675 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) { |
676 | InitFn = CE->getDirectCallee(); |
677 | } |
678 | if (InitFn) { |
679 | CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(D: InitFn); |
680 | if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { |
681 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) |
682 | << InitFnTarget << InitFn; |
683 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; |
684 | VD->setInvalidDecl(); |
685 | } |
686 | } |
687 | } |
688 | } |
689 | |
690 | void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice( |
691 | const FunctionDecl *Callee) { |
692 | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
693 | if (!Caller) |
694 | return; |
695 | |
696 | if (!isCUDAImplicitHostDeviceFunction(D: Callee)) |
697 | return; |
698 | |
699 | CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(D: Caller); |
700 | |
701 | // Record whether an implicit host device function is used on device side. |
702 | if (CallerTarget != CFT_Device && CallerTarget != CFT_Global && |
703 | (CallerTarget != CFT_HostDevice || |
704 | (isCUDAImplicitHostDeviceFunction(D: Caller) && |
705 | !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(V: Caller)))) |
706 | return; |
707 | |
708 | getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(V: Callee); |
709 | } |
710 | |
711 | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
712 | // treated as implicitly __host__ __device__, unless: |
713 | // * it is a variadic function (device-side variadic functions are not |
714 | // allowed), or |
715 | // * a __device__ function with this signature was already declared, in which |
716 | // case in which case we output an error, unless the __device__ decl is in a |
717 | // system header, in which case we leave the constexpr function unattributed. |
718 | // |
719 | // In addition, all function decls are treated as __host__ __device__ when |
720 | // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a |
721 | // #pragma clang force_cuda_host_device_begin/end |
722 | // pair). |
723 | void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, |
724 | const LookupResult &Previous) { |
725 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
726 | |
727 | if (ForceCUDAHostDeviceDepth > 0) { |
728 | if (!NewD->hasAttr<CUDAHostAttr>()) |
729 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
730 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
731 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
732 | return; |
733 | } |
734 | |
735 | // If a template function has no host/device/global attributes, |
736 | // make it implicitly host device function. |
737 | if (getLangOpts().OffloadImplicitHostDeviceTemplates && |
738 | !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && |
739 | !NewD->hasAttr<CUDAGlobalAttr>() && |
740 | (NewD->getDescribedFunctionTemplate() || |
741 | NewD->isFunctionTemplateSpecialization())) { |
742 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
743 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
744 | return; |
745 | } |
746 | |
747 | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || |
748 | NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || |
749 | NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) |
750 | return; |
751 | |
752 | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
753 | // attributes? |
754 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
755 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D)) |
756 | D = Using->getTargetDecl(); |
757 | FunctionDecl *OldD = D->getAsFunction(); |
758 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
759 | !OldD->hasAttr<CUDAHostAttr>() && |
760 | !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, |
761 | /* ConsiderCudaAttrs = */ false); |
762 | }; |
763 | auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn); |
764 | if (It != Previous.end()) { |
765 | // We found a __device__ function with the same name and signature as NewD |
766 | // (ignoring CUDA attrs). This is an error unless that function is defined |
767 | // in a system header, in which case we simply return without making NewD |
768 | // host+device. |
769 | NamedDecl *Match = *It; |
770 | if (!getSourceManager().isInSystemHeader(Loc: Match->getLocation())) { |
771 | Diag(NewD->getLocation(), |
772 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
773 | << NewD; |
774 | Diag(Match->getLocation(), |
775 | diag::note_cuda_conflicting_device_function_declared_here); |
776 | } |
777 | return; |
778 | } |
779 | |
780 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
781 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
782 | } |
783 | |
784 | // TODO: `__constant__` memory may be a limited resource for certain targets. |
785 | // A safeguard may be needed at the end of compilation pipeline if |
786 | // `__constant__` memory usage goes beyond limit. |
787 | void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { |
788 | // Do not promote dependent variables since the cotr/dtor/initializer are |
789 | // not determined. Do it after instantiation. |
790 | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && |
791 | !VD->hasAttr<CUDASharedAttr>() && |
792 | (VD->isFileVarDecl() || VD->isStaticDataMember()) && |
793 | !IsDependentVar(VD) && |
794 | ((VD->isConstexpr() || VD->getType().isConstQualified()) && |
795 | HasAllowedCUDADeviceStaticInitializer(*this, VD, |
796 | CICK_DeviceOrConstant))) { |
797 | VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); |
798 | } |
799 | } |
800 | |
801 | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, |
802 | unsigned DiagID) { |
803 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
804 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
805 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
806 | if (!CurFunContext) |
807 | return SemaDiagnosticBuilder::K_Nop; |
808 | switch (CurrentCUDATarget()) { |
809 | case CFT_Global: |
810 | case CFT_Device: |
811 | return SemaDiagnosticBuilder::K_Immediate; |
812 | case CFT_HostDevice: |
813 | // An HD function counts as host code if we're compiling for host, and |
814 | // device code if we're compiling for device. Defer any errors in device |
815 | // mode until the function is known-emitted. |
816 | if (!getLangOpts().CUDAIsDevice) |
817 | return SemaDiagnosticBuilder::K_Nop; |
818 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
819 | return SemaDiagnosticBuilder::K_Immediate; |
820 | return (getEmissionStatus(Decl: CurFunContext) == |
821 | FunctionEmissionStatus::Emitted) |
822 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
823 | : SemaDiagnosticBuilder::K_Deferred; |
824 | default: |
825 | return SemaDiagnosticBuilder::K_Nop; |
826 | } |
827 | }(); |
828 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
829 | } |
830 | |
831 | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, |
832 | unsigned DiagID) { |
833 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
834 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
835 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
836 | if (!CurFunContext) |
837 | return SemaDiagnosticBuilder::K_Nop; |
838 | switch (CurrentCUDATarget()) { |
839 | case CFT_Host: |
840 | return SemaDiagnosticBuilder::K_Immediate; |
841 | case CFT_HostDevice: |
842 | // An HD function counts as host code if we're compiling for host, and |
843 | // device code if we're compiling for device. Defer any errors in device |
844 | // mode until the function is known-emitted. |
845 | if (getLangOpts().CUDAIsDevice) |
846 | return SemaDiagnosticBuilder::K_Nop; |
847 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
848 | return SemaDiagnosticBuilder::K_Immediate; |
849 | return (getEmissionStatus(Decl: CurFunContext) == |
850 | FunctionEmissionStatus::Emitted) |
851 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
852 | : SemaDiagnosticBuilder::K_Deferred; |
853 | default: |
854 | return SemaDiagnosticBuilder::K_Nop; |
855 | } |
856 | }(); |
857 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
858 | } |
859 | |
860 | bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { |
861 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
862 | assert(Callee && "Callee may not be null." ); |
863 | |
864 | const auto &ExprEvalCtx = currentEvaluationContext(); |
865 | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) |
866 | return true; |
867 | |
868 | // FIXME: Is bailing out early correct here? Should we instead assume that |
869 | // the caller is a global initializer? |
870 | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
871 | if (!Caller) |
872 | return true; |
873 | |
874 | // If the caller is known-emitted, mark the callee as known-emitted. |
875 | // Otherwise, mark the call in our call graph so we can traverse it later. |
876 | bool CallerKnownEmitted = |
877 | getEmissionStatus(Decl: Caller) == FunctionEmissionStatus::Emitted; |
878 | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
879 | CallerKnownEmitted] { |
880 | switch (IdentifyCUDAPreference(Caller, Callee)) { |
881 | case CFP_Never: |
882 | case CFP_WrongSide: |
883 | assert(Caller && "Never/wrongSide calls require a non-null caller" ); |
884 | // If we know the caller will be emitted, we know this wrong-side call |
885 | // will be emitted, so it's an immediate error. Otherwise, defer the |
886 | // error until we know the caller is emitted. |
887 | return CallerKnownEmitted |
888 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
889 | : SemaDiagnosticBuilder::K_Deferred; |
890 | default: |
891 | return SemaDiagnosticBuilder::K_Nop; |
892 | } |
893 | }(); |
894 | |
895 | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
896 | // For -fgpu-rdc, keep track of external kernels used by host functions. |
897 | if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode && |
898 | Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined()) |
899 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); |
900 | return true; |
901 | } |
902 | |
903 | // Avoid emitting this error twice for the same location. Using a hashtable |
904 | // like this is unfortunate, but because we must continue parsing as normal |
905 | // after encountering a deferred error, it's otherwise very tricky for us to |
906 | // ensure that we only emit this deferred error once. |
907 | if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second) |
908 | return true; |
909 | |
910 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) |
911 | << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee |
912 | << IdentifyCUDATarget(Caller); |
913 | if (!Callee->getBuiltinID()) |
914 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
915 | diag::note_previous_decl, Caller, *this) |
916 | << Callee; |
917 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
918 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
919 | } |
920 | |
921 | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
922 | // A lambda function may capture a stack variable by reference when it is |
923 | // defined and uses the capture by reference when the lambda is called. When |
924 | // the capture and use happen on different sides, the capture is invalid and |
925 | // should be diagnosed. |
926 | void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, |
927 | const sema::Capture &Capture) { |
928 | // In host compilation we only need to check lambda functions emitted on host |
929 | // side. In such lambda functions, a reference capture is invalid only |
930 | // if the lambda structure is populated by a device function or kernel then |
931 | // is passed to and called by a host function. However that is impossible, |
932 | // since a device function or kernel can only call a device function, also a |
933 | // kernel cannot pass a lambda back to a host function since we cannot |
934 | // define a kernel argument type which can hold the lambda before the lambda |
935 | // itself is defined. |
936 | if (!LangOpts.CUDAIsDevice) |
937 | return; |
938 | |
939 | // File-scope lambda can only do init captures for global variables, which |
940 | // results in passing by value for these global variables. |
941 | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
942 | if (!Caller) |
943 | return; |
944 | |
945 | // In device compilation, we only need to check lambda functions which are |
946 | // emitted on device side. For such lambdas, a reference capture is invalid |
947 | // only if the lambda structure is populated by a host function then passed |
948 | // to and called in a device function or kernel. |
949 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
950 | bool CallerIsHost = |
951 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
952 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
953 | if (!ShouldCheck || !Capture.isReferenceCapture()) |
954 | return; |
955 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
956 | if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { |
957 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
958 | diag::err_capture_bad_target, Callee, *this) |
959 | << Capture.getVariable(); |
960 | } else if (Capture.isThisCapture()) { |
961 | // Capture of this pointer is allowed since this pointer may be pointing to |
962 | // managed memory which is accessible on both device and host sides. It only |
963 | // results in invalid memory access if this pointer points to memory not |
964 | // accessible on device side. |
965 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
966 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
967 | *this); |
968 | } |
969 | } |
970 | |
971 | void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { |
972 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
973 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) |
974 | return; |
975 | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
976 | Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
977 | } |
978 | |
979 | void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, |
980 | const LookupResult &Previous) { |
981 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
982 | CUDAFunctionTarget NewTarget = IdentifyCUDATarget(D: NewFD); |
983 | for (NamedDecl *OldND : Previous) { |
984 | FunctionDecl *OldFD = OldND->getAsFunction(); |
985 | if (!OldFD) |
986 | continue; |
987 | |
988 | CUDAFunctionTarget OldTarget = IdentifyCUDATarget(D: OldFD); |
989 | // Don't allow HD and global functions to overload other functions with the |
990 | // same signature. We allow overloading based on CUDA attributes so that |
991 | // functions can have different implementations on the host and device, but |
992 | // HD/global functions "exist" in some sense on both the host and device, so |
993 | // should have the same implementation on both sides. |
994 | if (NewTarget != OldTarget && |
995 | ((NewTarget == CFT_HostDevice && |
996 | !(LangOpts.OffloadImplicitHostDeviceTemplates && |
997 | isCUDAImplicitHostDeviceFunction(D: NewFD) && |
998 | OldTarget == CFT_Device)) || |
999 | (OldTarget == CFT_HostDevice && |
1000 | !(LangOpts.OffloadImplicitHostDeviceTemplates && |
1001 | isCUDAImplicitHostDeviceFunction(D: OldFD) && |
1002 | NewTarget == CFT_Device)) || |
1003 | (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && |
1004 | !IsOverload(New: NewFD, Old: OldFD, /* UseMemberUsingDeclRules = */ false, |
1005 | /* ConsiderCudaAttrs = */ false)) { |
1006 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) |
1007 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; |
1008 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
1009 | NewFD->setInvalidDecl(); |
1010 | break; |
1011 | } |
1012 | } |
1013 | } |
1014 | |
1015 | template <typename AttrTy> |
1016 | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
1017 | const FunctionDecl &TemplateFD) { |
1018 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
1019 | AttrTy *Clone = Attribute->clone(S.Context); |
1020 | Clone->setInherited(true); |
1021 | FD->addAttr(A: Clone); |
1022 | } |
1023 | } |
1024 | |
1025 | void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, |
1026 | const FunctionTemplateDecl &TD) { |
1027 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
1028 | copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); |
1029 | copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); |
1030 | copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); |
1031 | } |
1032 | |
1033 | std::string Sema::getCudaConfigureFuncName() const { |
1034 | if (getLangOpts().HIP) |
1035 | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" |
1036 | : "hipConfigureCall" ; |
1037 | |
1038 | // New CUDA kernel launch sequence. |
1039 | if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), |
1040 | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
1041 | return "__cudaPushCallConfiguration" ; |
1042 | |
1043 | // Legacy CUDA kernel configuration call |
1044 | return "cudaConfigureCall" ; |
1045 | } |
1046 | |