hliao updated this revision to Diff 228924. hliao added a comment. This patch is revived with more changes addressing the previous concerns.
Back to Justin's example: __host__ float bar(); __device__ int bar(); __host__ __device__ auto foo() -> decltype(bar()) { return bar(); } Even without this patch, that example already passed the compilation without either errors or warnings. Says clang -std=c++11 -x cuda -nocudainc -nocudalib --cuda-gpu-arch=sm_60 --cuda-device-only -S -emit-llvm -O3 foo.cu In c++14, that example could be even simplified without `decltype` but the same ambiguity. __host__ float bar(); __device__ int bar(); __host__ __device__ auto foo() { return bar(); } Without any change, clang also compiles the code as well and uses different return types between host-side and device-side compilation.[^1] [^1]: The first example has the same return type between host-side and device-side but that seems incorrect or unreasonable to me. The ambiguity issue is in fact not introduced by relaxing `decltype`. That's an inherent one as we allow overloading over target attributes. Issuing warnings instead of errors seems more reasonable to me for such cases. In this patch, besides relaxing the CUDA call rule under `decltype`, it also generates warning during function overloading if there are more than candidates with different return types. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D61458/new/ https://reviews.llvm.org/D61458 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaOverload.cpp clang/test/CodeGenCUDA/function-overload.cu clang/test/Misc/warning-flags.c clang/test/SemaCUDA/function-overload.cu
Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -3,6 +3,8 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s #include "Inputs/cuda.h" @@ -419,3 +421,30 @@ int test_constexpr_overload(C2 &x, C2 &y) { return constexpr_overload(x, y); } + +#if defined(CHECK_DECLTYPE) +#if defined(__CUDA_ARCH__) +// expected-note@+6 {{other definition of 't0'}} +// expected-note@+6 {{use this definition of 't0'}} +#else +// expected-note@+3 {{use this definition of 't0'}} +// expected-note@+3 {{other definition of 't0'}} +#endif +__host__ float t0(); +__device__ int t0(); + +__host__ __device__ void dt0() { + // expected-warning@+1 {{return type of 't0' in 'decltype' is ambiguous and may not be expected}} + decltype(t0()) ret; +} + +__host__ float t1(); + +__device__ void dt1() { + decltype(t1()) ret; // OK. `decltype` is relaxed. +} + +__host__ __device__ void dt2() { + decltype(t1()) ret; // OK. `decltype` is relaxed. +} +#endif Index: clang/test/Misc/warning-flags.c =================================================================== --- clang/test/Misc/warning-flags.c +++ clang/test/Misc/warning-flags.c @@ -18,7 +18,7 @@ The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (74): +CHECK: Warnings without flags (75): CHECK-NEXT: ext_excess_initializers CHECK-NEXT: ext_excess_initializers_in_char_array_initializer CHECK-NEXT: ext_expected_semi_decl_list @@ -47,6 +47,7 @@ CHECK-NEXT: warn_conv_to_base_not_used CHECK-NEXT: warn_conv_to_self_not_used CHECK-NEXT: warn_conv_to_void_not_used +CHECK-NEXT: warn_decltype_ambiguous_return_type CHECK-NEXT: warn_delete_array_type CHECK-NEXT: warn_double_const_requires_fp64 CHECK-NEXT: warn_drv_assuming_mfloat_abi_is Index: clang/test/CodeGenCUDA/function-overload.cu =================================================================== --- clang/test/CodeGenCUDA/function-overload.cu +++ clang/test/CodeGenCUDA/function-overload.cu @@ -8,6 +8,8 @@ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s +// RUN: %clang_cc1 -std=c++11 -DCHECK_DECLTYPE -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-DECLTYPE %s #include "Inputs/cuda.h" @@ -53,3 +55,14 @@ // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev( // CHECK-BOTH: store i32 32, // CHECK-BOTH: ret void + +#if defined(CHECK_DECLTYPE) +int foo(float); +// CHECK-DECLTYPE-LABEL: @_Z3barf +// CHECK-DECLTYPE: fptosi +// CHECK-DECLTYPE: sitofp +__device__ float bar(float x) { + decltype(foo(x)) y = x; + return y + 3.f; +} +#endif Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -6237,7 +6237,7 @@ // case we may not yet know what the member's target is; the target is // inferred for the member automatically, based on the bases and fields of // the class. - if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) { + if (!Caller->isImplicit() && !isCUDACallAllowed(Function)) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_bad_target; return; @@ -6753,7 +6753,7 @@ // (CUDA B.1): Check for invalid calls between targets. if (getLangOpts().CUDA) if (const FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext)) - if (!IsAllowedCUDACall(Caller, Method)) { + if (!isCUDACallAllowed(Method)) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_bad_target; return; @@ -9673,6 +9673,28 @@ if (Best->Function && Best->Function->isDeleted()) return OR_Deleted; + // Issue a warning of return type resolution under `decltype`. + if (S.getLangOpts().CUDA && Best->Function && S.underDecltypeContext()) { + SmallVector<const OverloadCandidate *, 16> AmbiSet; + QualType BestReturnType = Best->Function->getReturnType(); + for (auto &Cand : this->Candidates) { + if (!Cand.Viable || !Cand.Function) + continue; + if (BestReturnType != Cand.Function->getReturnType()) + AmbiSet.push_back(&Cand); + } + if (!AmbiSet.empty()) { + S.Diag(Loc, diag::warn_decltype_ambiguous_return_type) << Best->Function; + S.Diag(Best->Function->getLocation(), + diag::note_decltype_ambiguous_function_chosen) + << Best->Function; + for (auto C : AmbiSet) + S.Diag(C->Function->getLocation(), + diag::note_decltype_ambiguous_function_other) + << C->Function; + } + } + if (!EquivalentCands.empty()) S.diagnoseEquivalentInternalLinkageDeclarations(Loc, Best->Function, EquivalentCands); @@ -11491,7 +11513,7 @@ if (FunctionDecl *FunDecl = dyn_cast<FunctionDecl>(Fn)) { if (S.getLangOpts().CUDA) if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) - if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl)) + if (!Caller->isImplicit() && !S.isCUDACallAllowed(FunDecl)) return false; if (FunDecl->isMultiVersion()) { const auto *TA = FunDecl->getAttr<TargetAttr>(); Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -8102,6 +8102,14 @@ return ExprEvalContexts.back().isUnevaluated(); } + bool underDecltypeContext() const { + return llvm::any_of(ExprEvalContexts, + [](const ExpressionEvaluationContextRecord &C) { + return C.ExprContext == + ExpressionEvaluationContextRecord::EK_Decltype; + }); + } + /// RAII class used to determine whether SFINAE has /// trapped any errors that occur during template argument /// deduction. @@ -10953,14 +10961,18 @@ CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, const FunctionDecl *Callee); - /// Determines whether Caller may invoke Callee, based on their CUDA - /// host/device attributes. Returns false if the call is not allowed. + /// Determines, under the current context, whether Callee may be invokable, + /// based on their CUDA host/device attributes. Returns false if the call is + /// not allowed. /// /// Note: Will return true for CFP_WrongSide calls. These may appear in /// semantically correct CUDA programs, but only if they're never codegen'ed. - bool IsAllowedCUDACall(const FunctionDecl *Caller, - const FunctionDecl *Callee) { - return IdentifyCUDAPreference(Caller, Callee) != CFP_Never; + bool isCUDACallAllowed(const FunctionDecl *Callee) { + // Under `decltype`, the rule is relaxed. + if (underDecltypeContext()) + return true; + return IdentifyCUDAPreference(dyn_cast<FunctionDecl>(CurContext), Callee) != + CFP_Never; } /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7496,6 +7496,12 @@ "cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">; def note_cuda_ovl_candidate_target_mismatch : Note< "candidate template ignored: target attributes do not match">; +def warn_decltype_ambiguous_return_type : Warning< + "return type of %0 in 'decltype' is ambiguous and may not be expected">; +def note_decltype_ambiguous_function_chosen : Note< + "use this definition of %0">; +def note_decltype_ambiguous_function_other : Note< + "other definition of %0">; def warn_non_pod_vararg_with_format_string : Warning< "cannot pass %select{non-POD|non-trivial}0 object of type %1 to variadic "
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits