https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/65606:
>From a998f1291e63eff53bed12bb0a369f3b636c4ae0 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <yaxun....@amd.com> Date: Thu, 31 Aug 2023 18:02:56 -0400 Subject: [PATCH] Reland "[CUDA][HIP] Fix overloading resolution in global var init" https://reviews.llvm.org/D158247 caused regressions for HIP on Windows and was reverted. A reduced test case is: ``` typedef void (__stdcall* funcTy)(); void invoke(funcTy f); static void __stdcall callee() noexcept { } void foo() { invoke(callee); } ``` It is due to clang missing handling host/device attributes for calling convention at a few places This patch fixes that. --- .gitignore | 1 + clang/include/clang/Sema/Sema.h | 46 +++++++++--- clang/lib/Parse/ParseDecl.cpp | 1 + clang/lib/Sema/SemaCUDA.cpp | 24 ++++++- clang/lib/Sema/SemaDeclAttr.cpp | 9 ++- clang/lib/Sema/SemaOverload.cpp | 45 ++++++------ clang/lib/Sema/SemaType.cpp | 60 +++++++++------- clang/test/CodeGenCUDA/global-initializers.cu | 51 +++++++++++++ .../SemaCUDA/amdgpu-windows-vectorcall.cu | 1 + clang/test/SemaCUDA/function-overload.cu | 6 ++ .../test/SemaCUDA/global-initializers-host.cu | 32 --------- clang/test/SemaCUDA/global-initializers.cu | 72 +++++++++++++++++++ clang/test/SemaCUDA/windows-calling-conv.cu | 17 +++++ 13 files changed, 272 insertions(+), 93 deletions(-) create mode 100644 clang/test/CodeGenCUDA/global-initializers.cu delete mode 100644 clang/test/SemaCUDA/global-initializers-host.cu create mode 100644 clang/test/SemaCUDA/global-initializers.cu create mode 100644 clang/test/SemaCUDA/windows-calling-conv.cu diff --git a/.gitignore b/.gitignore index 20c4f52cd37860e..8021a3eb8919765 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,4 @@ pythonenv* /clang/utils/analyzer/projects/*/RefScanBuildResults # automodapi puts generated documentation files here. /lldb/docs/python_api/ +/Debug/ diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5fdca93c66ab5cd..1bb096c667e39c3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1012,6 +1012,14 @@ class Sema final { } } DelayedDiagnostics; + enum CUDAFunctionTarget { + CFT_Device, + CFT_Global, + CFT_Host, + CFT_HostDevice, + CFT_InvalidTarget + }; + /// A RAII object to temporarily push a declaration context. class ContextRAII { private: @@ -4753,8 +4761,13 @@ class Sema final { bool isValidPointerAttrType(QualType T, bool RefOkay = false); bool CheckRegparmAttr(const ParsedAttr &attr, unsigned &value); + + /// Check validaty of calling convention attribute \p attr. If \p FD + /// is not null pointer, use \p FD to determine the CUDA/HIP host/device + /// target. Otherwise, it is specified by \p CFT. bool CheckCallingConvAttr(const ParsedAttr &attr, CallingConv &CC, - const FunctionDecl *FD = nullptr); + const FunctionDecl *FD = nullptr, + CUDAFunctionTarget CFT = CFT_InvalidTarget); bool CheckAttrTarget(const ParsedAttr &CurrAttr); bool CheckAttrNoArgs(const ParsedAttr &CurrAttr); bool checkStringLiteralArgumentAttr(const AttributeCommonInfo &CI, @@ -13266,14 +13279,6 @@ class Sema final { void checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D = nullptr); - enum CUDAFunctionTarget { - CFT_Device, - CFT_Global, - CFT_Host, - CFT_HostDevice, - CFT_InvalidTarget - }; - /// Determines whether the given function is a CUDA device/host/kernel/etc. /// function. /// @@ -13292,6 +13297,29 @@ class Sema final { /// Determines whether the given variable is emitted on host or device side. CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D); + /// Defines kinds of CUDA global host/device context where a function may be + /// called. + enum CUDATargetContextKind { + CTCK_Unknown, /// Unknown context + CTCK_InitGlobalVar, /// Function called during global variable + /// initialization + }; + + /// Define the current global CUDA host/device context where a function may be + /// called. Only used when a function is called outside of any functions. + struct CUDATargetContext { + CUDAFunctionTarget Target = CFT_HostDevice; + CUDATargetContextKind Kind = CTCK_Unknown; + Decl *D = nullptr; + } CurCUDATargetCtx; + + struct CUDATargetContextRAII { + Sema &S; + CUDATargetContext SavedCtx; + CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D); + ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } + }; + /// Gets the CUDA target for the current context. CUDAFunctionTarget CurrentCUDATarget() { return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext)); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 4a9f2caf654713e..7c27a02ee4af625 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2571,6 +2571,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( } } + Sema::CUDATargetContextRAII X(Actions, Sema::CTCK_InitGlobalVar, ThisDecl); switch (TheInitKind) { // Parse declarator '=' initializer. case InitKind::Equal: { diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index cfea6493ced7d26..88f5484575db17a 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { } template <typename A> -static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { +static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { return isa<A>(Attribute) && !(IgnoreImplicitAttr && Attribute->isImplicit()); }); } +Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, + CUDATargetContextKind K, + Decl *D) + : S(S_) { + SavedCtx = S.CurCUDATargetCtx; + assert(K == CTCK_InitGlobalVar); + auto *VD = dyn_cast_or_null<VarDecl>(D); + if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { + auto Target = CFT_Host; + if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && + !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || + hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || + hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) + Target = CFT_Device; + S.CurCUDATargetCtx = {Target, K, VD}; + } +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr) { - // Code that lives outside a function is run on the host. + // Code that lives outside a function gets the target from CurCUDATargetCtx. if (D == nullptr) - return CFT_Host; + return CurCUDATargetCtx.Target; if (D->hasAttr<CUDAInvalidTargetAttr>()) return CFT_InvalidTarget; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 59e0f3e83cfdd80..cc98713241395ec 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5132,7 +5132,8 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // Diagnostic is emitted elsewhere: here we store the (valid) AL // in the Decl node for syntactic reasoning, e.g., pretty-printing. CallingConv CC; - if (S.CheckCallingConvAttr(AL, CC, /*FD*/nullptr)) + if (S.CheckCallingConvAttr(AL, CC, /*FD*/ nullptr, + S.IdentifyCUDATarget(dyn_cast<FunctionDecl>(D)))) return; if (!isa<ObjCMethodDecl>(D)) { @@ -5317,7 +5318,8 @@ static void handleNoRandomizeLayoutAttr(Sema &S, Decl *D, } bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, - const FunctionDecl *FD) { + const FunctionDecl *FD, + CUDAFunctionTarget CFT) { if (Attrs.isInvalid()) return true; @@ -5416,7 +5418,8 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, // on their host/device attributes. if (LangOpts.CUDA) { auto *Aux = Context.getAuxTargetInfo(); - auto CudaTarget = IdentifyCUDATarget(FD); + assert(FD || CFT != CFT_InvalidTarget); + auto CudaTarget = FD ? IdentifyCUDATarget(FD) : CFT; bool CheckHost = false, CheckDevice = false; switch (CudaTarget) { case CFT_HostDevice: diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index fb62dee4aa58eae..d69b339306f0060 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6697,17 +6697,19 @@ void Sema::AddOverloadCandidate( } // (CUDA B.1): Check for invalid calls between targets. - if (getLangOpts().CUDA) - if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true)) - // Skip the check for callers that are implicit members, because in this - // case we may not yet know what the member's target is; the target is - // inferred for the member automatically, based on the bases and fields of - // the class. - if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) { - Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_target; - return; - } + if (getLangOpts().CUDA) { + const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + // Skip the check for callers that are implicit members, because in this + // case we may not yet know what the member's target is; the target is + // inferred for the member automatically, based on the bases and fields of + // the class. + if (!(Caller && Caller->isImplicit()) && + !IsAllowedCUDACall(Caller, Function)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_bad_target; + return; + } + } if (Function->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; @@ -7219,12 +7221,11 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, // (CUDA B.1): Check for invalid calls between targets. if (getLangOpts().CUDA) - if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true)) - if (!IsAllowedCUDACall(Caller, Method)) { - Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_target; - return; - } + if (!IsAllowedCUDACall(getCurFunctionDecl(/*AllowLambda=*/true), Method)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_bad_target; + return; + } if (Method->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; @@ -12495,10 +12496,12 @@ class AddressOfFunctionResolver { return false; if (FunctionDecl *FunDecl = dyn_cast<FunctionDecl>(Fn)) { - if (S.getLangOpts().CUDA) - if (FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true)) - if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl)) - return false; + if (S.getLangOpts().CUDA) { + FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true); + if (!(Caller && Caller->isImplicit()) && + !S.IsAllowedCUDACall(Caller, FunDecl)) + return false; + } if (FunDecl->isMultiVersion()) { const auto *TA = FunDecl->getAttr<TargetAttr>(); if (TA && !TA->isDefaultVersion()) diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 94d170af867193c..ffd29446b4f2edd 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -366,12 +366,14 @@ enum TypeAttrLocation { TAL_DeclName }; -static void processTypeAttrs(TypeProcessingState &state, QualType &type, - TypeAttrLocation TAL, - const ParsedAttributesView &attrs); +static void +processTypeAttrs(TypeProcessingState &state, QualType &type, + TypeAttrLocation TAL, const ParsedAttributesView &attrs, + Sema::CUDAFunctionTarget CFT = Sema::CFT_HostDevice); static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, - QualType &type); + QualType &type, + Sema::CUDAFunctionTarget CFT); static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state, ParsedAttr &attr, QualType &type); @@ -617,7 +619,8 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state, /// distributed, false if no location was found. static bool distributeFunctionTypeAttrToInnermost( TypeProcessingState &state, ParsedAttr &attr, - ParsedAttributesView &attrList, QualType &declSpecType) { + ParsedAttributesView &attrList, QualType &declSpecType, + Sema::CUDAFunctionTarget CFT) { Declarator &declarator = state.getDeclarator(); // Put it on the innermost function chunk, if there is one. @@ -629,19 +632,20 @@ static bool distributeFunctionTypeAttrToInnermost( return true; } - return handleFunctionTypeAttr(state, attr, declSpecType); + return handleFunctionTypeAttr(state, attr, declSpecType, CFT); } /// A function type attribute was written in the decl spec. Try to /// apply it somewhere. -static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state, - ParsedAttr &attr, - QualType &declSpecType) { +static void +distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state, + ParsedAttr &attr, QualType &declSpecType, + Sema::CUDAFunctionTarget CFT) { state.saveDeclSpecAttrs(); // Try to distribute to the innermost. if (distributeFunctionTypeAttrToInnermost( - state, attr, state.getCurrentAttributes(), declSpecType)) + state, attr, state.getCurrentAttributes(), declSpecType, CFT)) return; // If that failed, diagnose the bad attribute when the declarator is @@ -653,14 +657,14 @@ static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state, /// Try to apply it somewhere. /// `Attrs` is the attribute list containing the declaration (either of the /// declarator or the declaration). -static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state, - ParsedAttr &attr, - QualType &declSpecType) { +static void distributeFunctionTypeAttrFromDeclarator( + TypeProcessingState &state, ParsedAttr &attr, QualType &declSpecType, + Sema::CUDAFunctionTarget CFT) { Declarator &declarator = state.getDeclarator(); // Try to distribute to the innermost. if (distributeFunctionTypeAttrToInnermost( - state, attr, declarator.getAttributes(), declSpecType)) + state, attr, declarator.getAttributes(), declSpecType, CFT)) return; // If that failed, diagnose the bad attribute when the declarator is @@ -682,7 +686,8 @@ static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state, /// `Attrs` is the attribute list containing the declaration (either of the /// declarator or the declaration). static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state, - QualType &declSpecType) { + QualType &declSpecType, + Sema::CUDAFunctionTarget CFT) { // The called functions in this loop actually remove things from the current // list, so iterating over the existing list isn't possible. Instead, make a // non-owning copy and iterate over that. @@ -699,7 +704,7 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state, break; FUNCTION_TYPE_ATTRS_CASELIST: - distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType); + distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType, CFT); break; MS_TYPE_ATTRS_CASELIST: @@ -3544,7 +3549,8 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, // Note: We don't need to distribute declaration attributes (i.e. // D.getDeclarationAttributes()) because those are always C++11 attributes, // and those don't get distributed. - distributeTypeAttrsFromDeclarator(state, T); + distributeTypeAttrsFromDeclarator( + state, T, SemaRef.IdentifyCUDATarget(D.getAttributes())); // Find the deduced type in this type. Look in the trailing return type if we // have one, otherwise in the DeclSpec type. @@ -4055,7 +4061,8 @@ static CallingConv getCCForDeclaratorChunk( // function type. We'll diagnose the failure to apply them in // handleFunctionTypeAttr. CallingConv CC; - if (!S.CheckCallingConvAttr(AL, CC) && + if (!S.CheckCallingConvAttr(AL, CC, /*FunctionDecl=*/nullptr, + S.IdentifyCUDATarget(D.getAttributes())) && (!FTI.isVariadic || supportsVariadicCall(CC))) { return CC; } @@ -5727,7 +5734,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, } // See if there are any attributes on this declarator chunk. - processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs()); + processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs(), + S.IdentifyCUDATarget(D.getAttributes())); if (DeclType.Kind != DeclaratorChunk::Paren) { if (ExpectNoDerefChunk && !IsNoDerefableChunk(DeclType)) @@ -7801,7 +7809,8 @@ static bool checkMutualExclusion(TypeProcessingState &state, /// Process an individual function attribute. Returns true to /// indicate that the attribute was handled, false if it wasn't. static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, - QualType &type) { + QualType &type, + Sema::CUDAFunctionTarget CFT) { Sema &S = state.getSema(); FunctionTypeUnwrapper unwrapped(S, type); @@ -8032,7 +8041,7 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, // Otherwise, a calling convention. CallingConv CC; - if (S.CheckCallingConvAttr(attr, CC)) + if (S.CheckCallingConvAttr(attr, CC, /*FunctionDecl=*/nullptr, CFT)) return true; const FunctionType *fn = unwrapped.get(); @@ -8584,7 +8593,8 @@ static void HandleLifetimeBoundAttr(TypeProcessingState &State, static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, - const ParsedAttributesView &attrs) { + const ParsedAttributesView &attrs, + Sema::CUDAFunctionTarget CFT) { state.setParsedNoDeref(false); if (attrs.empty()) @@ -8826,7 +8836,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, // appertain to and hence should not use the "distribution" logic below. if (attr.isStandardAttributeSyntax() || attr.isRegularKeywordAttribute()) { - if (!handleFunctionTypeAttr(state, attr, type)) { + if (!handleFunctionTypeAttr(state, attr, type, CFT)) { diagnoseBadTypeAttribute(state.getSema(), attr, type); attr.setInvalid(); } @@ -8836,10 +8846,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, // Never process function type attributes as part of the // declaration-specifiers. if (TAL == TAL_DeclSpec) - distributeFunctionTypeAttrFromDeclSpec(state, attr, type); + distributeFunctionTypeAttrFromDeclSpec(state, attr, type, CFT); // Otherwise, handle the possible delays. - else if (!handleFunctionTypeAttr(state, attr, type)) + else if (!handleFunctionTypeAttr(state, attr, type, CFT)) distributeFunctionTypeAttr(state, attr, type); break; case ParsedAttr::AT_AcquireHandle: { diff --git a/clang/test/CodeGenCUDA/global-initializers.cu b/clang/test/CodeGenCUDA/global-initializers.cu new file mode 100644 index 000000000000000..821260e9c7466ff --- /dev/null +++ b/clang/test/CodeGenCUDA/global-initializers.cu @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -emit-llvm -o - \ +// RUN: | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 %s -fcuda-is-device \ +// RUN: -emit-llvm -o - -triple nvptx64 \ +// RUN: -aux-triple x86_64-unknown-linux-gnu | FileCheck \ +// RUN: -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Check host/device-based overloding resolution in global variable initializer. +double pow(double, double) { return 1.0; } + +__device__ double pow(double, int) { return 2.0; } + +// HOST-DAG: call {{.*}}double @_Z3powdd(double noundef 1.000000e+00, double noundef 1.000000e+00) +double X = pow(1.0, 1); + +constexpr double cpow(double, double) { return 11.0; } + +constexpr __device__ double cpow(double, int) { return 12.0; } + +// HOST-DAG: @CX = global double 1.100000e+01 +double CX = cpow(11.0, 1); + +// DEV-DAG: @CY = addrspace(1) externally_initialized global double 1.200000e+01 +__device__ double CY = cpow(12.0, 1); + +struct A { + double pow(double, double) { return 3.0; } + + __device__ double pow(double, int) { return 4.0; } +}; + +A a; + +// HOST-DAG: call {{.*}}double @_ZN1A3powEdd(ptr {{.*}}@a, double noundef 3.000000e+00, double noundef 1.000000e+00) +double AX = a.pow(3.0, 1); + +struct CA { + constexpr double cpow(double, double) const { return 13.0; } + + constexpr __device__ double cpow(double, int) const { return 14.0; } +}; + +const CA ca; + +// HOST-DAG: @CAX = global double 1.300000e+01 +double CAX = ca.cpow(13.0, 1); + +// DEV-DAG: @CAY = addrspace(1) externally_initialized global double 1.400000e+01 +__device__ double CAY = ca.cpow(14.0, 1); diff --git a/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu b/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu index 7636572f69833c4..7ef8a94750b4c47 100644 --- a/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu +++ b/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility -fsyntax-only -verify %s __cdecl void hostf1(); __vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of type 'void () __attribute__((cdecl))'}} diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu index 822e259968206c7..163648cd9a87af8 100644 --- a/clang/test/SemaCUDA/function-overload.cu +++ b/clang/test/SemaCUDA/function-overload.cu @@ -222,7 +222,13 @@ __host__ __device__ void hostdevicef() { // Test for address of overloaded function resolution in the global context. HostFnPtr fp_h = h; HostFnPtr fp_ch = ch; +#if defined (__CUDA_ARCH__) +__device__ +#endif CurrentFnPtr fp_dh = dh; +#if defined (__CUDA_ARCH__) +__device__ +#endif CurrentFnPtr fp_cdh = cdh; GlobalFnPtr fp_g = g; diff --git a/clang/test/SemaCUDA/global-initializers-host.cu b/clang/test/SemaCUDA/global-initializers-host.cu deleted file mode 100644 index 810c6b9777860b0..000000000000000 --- a/clang/test/SemaCUDA/global-initializers-host.cu +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-linux-unknown -fsyntax-only -o - -verify - -#include "Inputs/cuda.h" - -// Check that we get an error if we try to call a __device__ function from a -// module initializer. - -struct S { - __device__ S() {} - // expected-note@-1 {{'S' declared here}} -}; - -S s; -// expected-error@-1 {{reference to __device__ function 'S' in global initializer}} - -struct T { - __host__ __device__ T() {} -}; -T t; // No error, this is OK. - -struct U { - __host__ U() {} - __device__ U(int) {} - // expected-note@-1 {{'U' declared here}} -}; -U u(42); -// expected-error@-1 {{reference to __device__ function 'U' in global initializer}} - -__device__ int device_fn() { return 42; } -// expected-note@-1 {{'device_fn' declared here}} -int n = device_fn(); -// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}} diff --git a/clang/test/SemaCUDA/global-initializers.cu b/clang/test/SemaCUDA/global-initializers.cu new file mode 100644 index 000000000000000..29e386134a3ddc3 --- /dev/null +++ b/clang/test/SemaCUDA/global-initializers.cu @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -fsyntax-only -o - -verify +// RUN: %clang_cc1 %s -fcuda-is-device -triple nvptx -fsyntax-only -o - -verify + +#include "Inputs/cuda.h" + +// Check that we get an error if we try to call a __device__ function from a +// module initializer. + +struct S { + // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}} + // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}} + __device__ S() {} + // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}} +}; + +S s; +// expected-error@-1 {{no matching constructor for initialization of 'S'}} + +struct T { + __host__ __device__ T() {} +}; +T t; // No error, this is OK. + +struct U { + // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}} + // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}} + __host__ U() {} + // expected-note@-1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} + __device__ U(int) {} + // expected-note@-1 {{candidate constructor not viable: call to __device__ function from __host__ function}} +}; +U u(42); +// expected-error@-1 {{no matching constructor for initialization of 'U'}} + +__device__ int device_fn() { return 42; } +// expected-note@-1 {{candidate function not viable: call to __device__ function from __host__ function}} +int n = device_fn(); +// expected-error@-1 {{no matching function for call to 'device_fn'}} + +// Check host/device-based overloding resolution in global variable initializer. +double pow(double, double); + +__device__ double pow(double, int); + +double X = pow(1.0, 1); +__device__ double Y = pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + +constexpr double cpow(double, double) { return 1.0; } + +constexpr __device__ double cpow(double, int) { return 2.0; } + +const double CX = cpow(1.0, 1); +const __device__ double CY = cpow(2.0, 2); + +struct A { + double pow(double, double); + + __device__ double pow(double, int); + + constexpr double cpow(double, double) const { return 1.0; } + + constexpr __device__ double cpow(double, int) const { return 1.0; } + +}; + +A a; +double AX = a.pow(1.0, 1); +__device__ double AY = a.pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}} + +const A ca; +const double CAX = ca.cpow(1.0, 1); +const __device__ double CAY = ca.cpow(2.0, 2); diff --git a/clang/test/SemaCUDA/windows-calling-conv.cu b/clang/test/SemaCUDA/windows-calling-conv.cu new file mode 100644 index 000000000000000..0786f65bddb8e82 --- /dev/null +++ b/clang/test/SemaCUDA/windows-calling-conv.cu @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple \ +// RUN: x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device \ +// RUN: -fsyntax-only -verify -x hip %s +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility \ +// RUN: -fsyntax-only -verify -x hip %s + +// expected-no-diagnostics + +typedef void (__stdcall* funcTy)(); +void invoke(funcTy f); + +static void __stdcall callee() noexcept { +} + +void foo() { + invoke(callee); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits