blob: 568c765984f0df456e0f4fd62642081830bdff3a [file] [log] [blame]
Eli Bendersky7325e562014-09-03 15:27:03 +00001//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// \brief This file implements semantic analysis for CUDA constructs.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/Sema/Sema.h"
15#include "clang/AST/ASTContext.h"
16#include "clang/AST/Decl.h"
Reid Klecknerbbc01782014-12-03 21:53:36 +000017#include "clang/Lex/Preprocessor.h"
Eli Bendersky7325e562014-09-03 15:27:03 +000018#include "clang/Sema/SemaDiagnostic.h"
Eli Bendersky9a220fc2014-09-29 20:38:29 +000019#include "llvm/ADT/Optional.h"
20#include "llvm/ADT/SmallVector.h"
Eli Bendersky7325e562014-09-03 15:27:03 +000021using namespace clang;
22
23ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
24 MultiExprArg ExecConfig,
25 SourceLocation GGGLoc) {
26 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
27 if (!ConfigDecl)
28 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
29 << "cudaConfigureCall");
30 QualType ConfigQTy = ConfigDecl->getType();
31
32 DeclRefExpr *ConfigDR = new (Context)
33 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
34 MarkFunctionReferenced(LLLLoc, ConfigDecl);
35
36 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
37 /*IsExecConfig=*/true);
38}
39
40/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
41Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
Eli Bendersky9a220fc2014-09-29 20:38:29 +000042 if (D->hasAttr<CUDAInvalidTargetAttr>())
43 return CFT_InvalidTarget;
Eli Bendersky7325e562014-09-03 15:27:03 +000044
45 if (D->hasAttr<CUDAGlobalAttr>())
46 return CFT_Global;
47
48 if (D->hasAttr<CUDADeviceAttr>()) {
49 if (D->hasAttr<CUDAHostAttr>())
50 return CFT_HostDevice;
51 return CFT_Device;
Eli Benderskyf2787a02014-09-30 17:38:34 +000052 } else if (D->hasAttr<CUDAHostAttr>()) {
53 return CFT_Host;
54 } else if (D->isImplicit()) {
55 // Some implicit declarations (like intrinsic functions) are not marked.
56 // Set the most lenient target on them for maximal flexibility.
57 return CFT_HostDevice;
Eli Bendersky7325e562014-09-03 15:27:03 +000058 }
59
60 return CFT_Host;
61}
62
Artem Belevich94a55e82015-09-22 17:22:59 +000063// * CUDA Call preference table
64//
65// F - from,
66// T - to
67// Ph - preference in host mode
68// Pd - preference in device mode
69// H - handled in (x)
70// Preferences: b-best, f-fallback, l-last resort, n-never.
71//
72// | F | T | Ph | Pd | H |
73// |----+----+----+----+-----+
74// | d | d | b | b | (b) |
75// | d | g | n | n | (a) |
76// | d | h | l | l | (e) |
77// | d | hd | f | f | (c) |
78// | g | d | b | b | (b) |
79// | g | g | n | n | (a) |
80// | g | h | l | l | (e) |
81// | g | hd | f | f | (c) |
82// | h | d | l | l | (e) |
83// | h | g | b | b | (b) |
84// | h | h | b | b | (b) |
85// | h | hd | f | f | (c) |
86// | hd | d | l | f | (d) |
87// | hd | g | f | n |(d/a)|
88// | hd | h | f | l | (d) |
89// | hd | hd | b | b | (b) |
90
91Sema::CUDAFunctionPreference
92Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
93 const FunctionDecl *Callee) {
94 assert(getLangOpts().CUDATargetOverloads &&
95 "Should not be called w/o enabled target overloads.");
96
97 assert(Callee && "Callee must be valid.");
98 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99 CUDAFunctionTarget CallerTarget =
100 (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101
102 // If one of the targets is invalid, the check always fails, no matter what
103 // the other target is.
104 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105 return CFP_Never;
106
107 // (a) Can't call global from some contexts until we support CUDA's
108 // dynamic parallelism.
109 if (CalleeTarget == CFT_Global &&
110 (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111 (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112 return CFP_Never;
113
114 // (b) Best case scenarios
115 if (CalleeTarget == CallerTarget ||
116 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
117 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
118 return CFP_Best;
119
120 // (c) Calling HostDevice is OK as a fallback that works for everyone.
121 if (CalleeTarget == CFT_HostDevice)
122 return CFP_Fallback;
123
124 // Figure out what should be returned 'last resort' cases. Normally
125 // those would not be allowed, but we'll consider them if
126 // CUDADisableTargetCallChecks is true.
127 CUDAFunctionPreference QuestionableResult =
128 getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
129
130 // (d) HostDevice behavior depends on compilation mode.
131 if (CallerTarget == CFT_HostDevice) {
132 // Calling a function that matches compilation mode is OK.
133 // Calling a function from the other side is frowned upon.
134 if (getLangOpts().CUDAIsDevice)
135 return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
136 else
137 return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
138 ? CFP_Fallback
139 : QuestionableResult;
140 }
141
142 // (e) Calling across device/host boundary is not something you should do.
143 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
144 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
145 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
146 return QuestionableResult;
147
148 llvm_unreachable("All cases should've been handled by now.");
149}
150
Eli Bendersky9a220fc2014-09-29 20:38:29 +0000151bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
152 const FunctionDecl *Callee) {
Artem Belevich94a55e82015-09-22 17:22:59 +0000153 // With target overloads enabled, we only disallow calling
154 // combinations with CFP_Never.
155 if (getLangOpts().CUDATargetOverloads)
156 return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
157
Eli Bendersky4bdc50e2015-04-15 22:27:06 +0000158 // The CUDADisableTargetCallChecks short-circuits this check: we assume all
159 // cross-target calls are valid.
160 if (getLangOpts().CUDADisableTargetCallChecks)
161 return false;
162
Jacques Pienaar5bdd6772014-12-16 20:12:38 +0000163 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
164 CalleeTarget = IdentifyCUDATarget(Callee);
Eli Bendersky9a220fc2014-09-29 20:38:29 +0000165
Eli Bendersky9a220fc2014-09-29 20:38:29 +0000166 // If one of the targets is invalid, the check always fails, no matter what
167 // the other target is.
168 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
169 return true;
170
Reid Klecknerbbc01782014-12-03 21:53:36 +0000171 // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
Eli Bendersky7325e562014-09-03 15:27:03 +0000172 // Callable from the device only."
173 if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
174 return true;
175
Reid Klecknerbbc01782014-12-03 21:53:36 +0000176 // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
Eli Bendersky7325e562014-09-03 15:27:03 +0000177 // Callable from the host only."
Reid Klecknerbbc01782014-12-03 21:53:36 +0000178 // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
Eli Bendersky7325e562014-09-03 15:27:03 +0000179 // Callable from the host only."
180 if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
181 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
182 return true;
183
Reid Klecknerbbc01782014-12-03 21:53:36 +0000184 // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
185 // however, in which case the function is compiled for both the host and the
186 // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
187 // paths between host and device."
Reid Klecknerbbc01782014-12-03 21:53:36 +0000188 if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
Jacques Pienaar5bdd6772014-12-16 20:12:38 +0000189 // If the caller is implicit then the check always passes.
190 if (Caller->isImplicit()) return false;
191
192 bool InDeviceMode = getLangOpts().CUDAIsDevice;
Jacques Pienaara50178c2015-02-24 21:45:33 +0000193 if (!InDeviceMode && CalleeTarget != CFT_Host)
194 return true;
195 if (InDeviceMode && CalleeTarget != CFT_Device) {
196 // Allow host device functions to call host functions if explicitly
197 // requested.
198 if (CalleeTarget == CFT_Host &&
199 getLangOpts().CUDAAllowHostCallsFromHostDevice) {
200 Diag(Caller->getLocation(),
201 diag::warn_host_calls_from_host_device)
202 << Callee->getNameAsString() << Caller->getNameAsString();
203 return false;
204 }
205
Reid Klecknerbbc01782014-12-03 21:53:36 +0000206 return true;
Jacques Pienaara50178c2015-02-24 21:45:33 +0000207 }
Reid Klecknerbbc01782014-12-03 21:53:36 +0000208 }
Eli Bendersky7325e562014-09-03 15:27:03 +0000209
210 return false;
211}
212
Artem Belevich94a55e82015-09-22 17:22:59 +0000213template <typename T, typename FetchDeclFn>
214static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
215 llvm::SmallVectorImpl<T> &Matches,
216 FetchDeclFn FetchDecl) {
217 assert(S.getLangOpts().CUDATargetOverloads &&
218 "Should not be called w/o enabled target overloads.");
219 if (Matches.size() <= 1)
220 return;
221
222 // Find the best call preference among the functions in Matches.
223 Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
224 for (auto const &Match : Matches) {
225 P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
226 if (P > BestCFP)
227 BestCFP = P;
228 }
229
230 // Erase all functions with lower priority.
231 for (unsigned I = 0, N = Matches.size(); I != N;)
232 if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
233 Matches[I] = Matches[--N];
234 Matches.resize(N);
235 } else {
236 ++I;
237 }
238}
239
240void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
241 SmallVectorImpl<FunctionDecl *> &Matches){
242 EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
243 *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
244}
245
246void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
247 SmallVectorImpl<DeclAccessPair> &Matches) {
248 EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
249 *this, Caller, Matches, [](const DeclAccessPair &item) {
250 return dyn_cast<FunctionDecl>(item.getDecl());
251 });
252}
253
254void Sema::EraseUnwantedCUDAMatches(
255 const FunctionDecl *Caller,
256 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
257 EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
258 *this, Caller, Matches,
259 [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
260 return dyn_cast<FunctionDecl>(item.second);
261 });
262}
263
Eli Bendersky9a220fc2014-09-29 20:38:29 +0000264/// When an implicitly-declared special member has to invoke more than one
265/// base/field special member, conflicts may occur in the targets of these
266/// members. For example, if one base's member __host__ and another's is
267/// __device__, it's a conflict.
268/// This function figures out if the given targets \param Target1 and
269/// \param Target2 conflict, and if they do not it fills in
270/// \param ResolvedTarget with a target that resolves for both calls.
271/// \return true if there's a conflict, false otherwise.
272static bool
273resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
274 Sema::CUDAFunctionTarget Target2,
275 Sema::CUDAFunctionTarget *ResolvedTarget) {
Justin Lebarc66a1062016-01-20 00:26:57 +0000276 // Only free functions and static member functions may be global.
277 assert(Target1 != Sema::CFT_Global);
278 assert(Target2 != Sema::CFT_Global);
Eli Bendersky9a220fc2014-09-29 20:38:29 +0000279
280 if (Target1 == Sema::CFT_HostDevice) {
281 *ResolvedTarget = Target2;
282 } else if (Target2 == Sema::CFT_HostDevice) {
283 *ResolvedTarget = Target1;
284 } else if (Target1 != Target2) {
285 return true;
286 } else {
287 *ResolvedTarget = Target1;
288 }
289
290 return false;
291}
292
293bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
294 CXXSpecialMember CSM,
295 CXXMethodDecl *MemberDecl,
296 bool ConstRHS,
297 bool Diagnose) {
298 llvm::Optional<CUDAFunctionTarget> InferredTarget;
299
300 // We're going to invoke special member lookup; mark that these special
301 // members are called from this one, and not from its caller.
302 ContextRAII MethodContext(*this, MemberDecl);
303
304 // Look for special members in base classes that should be invoked from here.
305 // Infer the target of this member base on the ones it should call.
306 // Skip direct and indirect virtual bases for abstract classes.
307 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
308 for (const auto &B : ClassDecl->bases()) {
309 if (!B.isVirtual()) {
310 Bases.push_back(&B);
311 }
312 }
313
314 if (!ClassDecl->isAbstract()) {
315 for (const auto &VB : ClassDecl->vbases()) {
316 Bases.push_back(&VB);
317 }
318 }
319
320 for (const auto *B : Bases) {
321 const RecordType *BaseType = B->getType()->getAs<RecordType>();
322 if (!BaseType) {
323 continue;
324 }
325
326 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
327 Sema::SpecialMemberOverloadResult *SMOR =
328 LookupSpecialMember(BaseClassDecl, CSM,
329 /* ConstArg */ ConstRHS,
330 /* VolatileArg */ false,
331 /* RValueThis */ false,
332 /* ConstThis */ false,
333 /* VolatileThis */ false);
334
335 if (!SMOR || !SMOR->getMethod()) {
336 continue;
337 }
338
339 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
340 if (!InferredTarget.hasValue()) {
341 InferredTarget = BaseMethodTarget;
342 } else {
343 bool ResolutionError = resolveCalleeCUDATargetConflict(
344 InferredTarget.getValue(), BaseMethodTarget,
345 InferredTarget.getPointer());
346 if (ResolutionError) {
347 if (Diagnose) {
348 Diag(ClassDecl->getLocation(),
349 diag::note_implicit_member_target_infer_collision)
350 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
351 }
352 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
353 return true;
354 }
355 }
356 }
357
358 // Same as for bases, but now for special members of fields.
359 for (const auto *F : ClassDecl->fields()) {
360 if (F->isInvalidDecl()) {
361 continue;
362 }
363
364 const RecordType *FieldType =
365 Context.getBaseElementType(F->getType())->getAs<RecordType>();
366 if (!FieldType) {
367 continue;
368 }
369
370 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
371 Sema::SpecialMemberOverloadResult *SMOR =
372 LookupSpecialMember(FieldRecDecl, CSM,
373 /* ConstArg */ ConstRHS && !F->isMutable(),
374 /* VolatileArg */ false,
375 /* RValueThis */ false,
376 /* ConstThis */ false,
377 /* VolatileThis */ false);
378
379 if (!SMOR || !SMOR->getMethod()) {
380 continue;
381 }
382
383 CUDAFunctionTarget FieldMethodTarget =
384 IdentifyCUDATarget(SMOR->getMethod());
385 if (!InferredTarget.hasValue()) {
386 InferredTarget = FieldMethodTarget;
387 } else {
388 bool ResolutionError = resolveCalleeCUDATargetConflict(
389 InferredTarget.getValue(), FieldMethodTarget,
390 InferredTarget.getPointer());
391 if (ResolutionError) {
392 if (Diagnose) {
393 Diag(ClassDecl->getLocation(),
394 diag::note_implicit_member_target_infer_collision)
395 << (unsigned)CSM << InferredTarget.getValue()
396 << FieldMethodTarget;
397 }
398 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
399 return true;
400 }
401 }
402 }
403
404 if (InferredTarget.hasValue()) {
405 if (InferredTarget.getValue() == CFT_Device) {
406 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
407 } else if (InferredTarget.getValue() == CFT_Host) {
408 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
409 } else {
410 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
411 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
412 }
413 } else {
414 // If no target was inferred, mark this member as __host__ __device__;
415 // it's the least restrictive option that can be invoked from any target.
416 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
417 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
418 }
419
420 return false;
421}