hliao created this revision. hliao added reviewers: kzhuravl, yaxunl. hliao added a project: clang. Herald added subscribers: llvm-commits, hiraditya. Herald added a project: LLVM.
[D56411 <https://reviews.llvm.org/D56411>] Temp solution fixing CUDA template issue - template with overloadable kernel function as the template function need revising CheckCUDACall checking. [SelectionDAG] Harden the checking of RegClass when adding operand - If the operand index is out-of-range, expect nullptr is returned. [AMDGPU] Allow using integral non-type template parameters - Allow using integral non-type template parameters in the following attributes __attribute__((amdgpu_flat_work_group_size(<min>, <max>))) __attribute__((amdgpu_waves_per_eu(<min>[, <max>]))) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D58627 Files: .gitignore clang/include/clang/Basic/Attr.td clang/include/clang/Sema/Sema.h clang/lib/CodeGen/TargetInfo.cpp clang/lib/Sema/SemaCUDA.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaTemplate.cpp clang/lib/Sema/SemaTemplateInstantiateDecl.cpp clang/test/SemaCUDA/amdgpu-attrs.cu clang/test/SemaCUDA/kernel-template-with-func-arg.cu clang/test/SemaOpenCL/amdgpu-attrs.cl llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -398,8 +398,9 @@ const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; const TargetRegisterClass *IIRC = - II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) - : nullptr; + II ? TII->getRegClass(*II, IIOpNum, TRI, *MF) : nullptr; + assert(!II || IIOpNum < II->getNumOperands() || !IIRC); + IIRC = TRI->getAllocatableClass(IIRC); if (OpRC && IIRC && OpRC != IIRC && TargetRegisterInfo::isVirtualRegister(VReg)) { Index: clang/test/SemaOpenCL/amdgpu-attrs.cl =================================================================== --- clang/test/SemaOpenCL/amdgpu-attrs.cl +++ clang/test/SemaOpenCL/amdgpu-attrs.cl @@ -27,12 +27,12 @@ __attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} __attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} -__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} -__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} -__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} -__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} -__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} -__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} +__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} __attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}} __attribute__((amdgpu_num_vgpr("ABC"))) kernel void kernel_num_vgpr_ABC() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}} Index: clang/test/SemaCUDA/kernel-template-with-func-arg.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/kernel-template-with-func-arg.cu @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +struct C { + __device__ void devfun() {} + void hostfun() {} + template<class T> __device__ void devtempfun() {} + __device__ __host__ void devhostfun() {} +}; + +__device__ void devfun() {} +__host__ void hostfun() {} +template<class T> __device__ void devtempfun() {} +__device__ __host__ void devhostfun() {} + +template <void (*devF)()> __global__ void kernel() { devF();} +template <typename T, void(T::*devF)()> __global__ void kernel2(T *p) { (p->*devF)(); } + +template<> __global__ void kernel<devfun>(); +template<> __global__ void kernel<hostfun>(); // expected-error {{no function template matches function template specialization 'kernel'}} + // expected-note@-5 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} +template<> __global__ void kernel<devtempfun<int> >(); +template<> __global__ void kernel<devhostfun>(); + +template<> __global__ void kernel<&devfun>(); +template<> __global__ void kernel<&hostfun>(); // expected-error {{no function template matches function template specialization 'kernel'}} + // expected-note@-11 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} +template<> __global__ void kernel<&devtempfun<int> >(); +template<> __global__ void kernel<&devhostfun>(); + +template<> __global__ void kernel2<C, &C::devfun>(C *p); +template<> __global__ void kernel2<C, &C::hostfun>(C *p); // expected-error {{no function template matches function template specialization 'kernel2'}} + // expected-note@-16 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} +template<> __global__ void kernel2<C, &C::devtempfun<int> >(C *p); +template<> __global__ void kernel2<C, &C::devhostfun>(C *p); + +void fun() { + kernel<&devfun><<<1,1>>>(); + kernel<&hostfun><<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}} + // expected-note@-24 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} + kernel<&devtempfun<int> ><<<1,1>>>(); + kernel<&devhostfun><<<1,1>>>(); + + kernel<devfun><<<1,1>>>(); + kernel<hostfun><<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}} + // expected-note@-30 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} + kernel<devtempfun<int> ><<<1,1>>>(); + kernel<devhostfun><<<1,1>>>(); + + C a; + kernel2<C, &C::devfun><<<1,1>>>(&a); + kernel2<C, &C::hostfun><<<1,1>>>(&a); // expected-error {{no matching function for call to 'kernel2'}} + // expected-note@-36 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} + kernel2<C, &C::devtempfun<int> ><<<1,1>>>(&a); + kernel2<C, &C::devhostfun><<<1,1>>>(&a); +} Index: clang/test/SemaCUDA/amdgpu-attrs.cu =================================================================== --- clang/test/SemaCUDA/amdgpu-attrs.cu +++ clang/test/SemaCUDA/amdgpu-attrs.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s #include "Inputs/cuda.h" @@ -78,3 +78,119 @@ // expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}} __attribute__((intel_reqd_sub_group_size(64))) __global__ void intel_reqd_sub_group_size_64() {} + +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size("32", 64))) +__global__ void non_int_min_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, "64"))) +__global__ void non_int_max_flat_work_group_size_32_64() {} + +int nc_min = 32, nc_max = 64; +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(nc_min, 64))) +__global__ void non_cint_min_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, nc_max))) +__global__ void non_cint_max_flat_work_group_size_32_64() {} + +const int c_min = 16, c_max = 32; +__attribute__((amdgpu_flat_work_group_size(c_min * 2, 64))) +__global__ void cint_min_flat_work_group_size_32_64() {} +__attribute__((amdgpu_flat_work_group_size(32, c_max * 2))) +__global__ void cint_max_flat_work_group_size_32_64() {} + +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template<typename T> +__attribute__((amdgpu_flat_work_group_size(T, 64))) +__global__ void template_class_min_flat_work_group_size_32_64() {} +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template<typename T> +__attribute__((amdgpu_flat_work_group_size(32, T))) +__global__ void template_class_max_flat_work_group_size_32_64() {} + +template<unsigned a, unsigned b> +__attribute__((amdgpu_flat_work_group_size(a, b))) +__global__ void template_flat_work_group_size_32_64() {} +template __global__ void template_flat_work_group_size_32_64<32, 64>(); + +template<unsigned a, unsigned b, unsigned c> +__attribute__((amdgpu_flat_work_group_size(a + b, b + c))) +__global__ void template_complex_flat_work_group_size_32_64() {} +template __global__ void template_complex_flat_work_group_size_32_64<16, 16, 48>(); + +unsigned ipow2(unsigned n) { return n == 0 ? 1 : 2 * ipow2(n - 1); } +constexpr unsigned ce_ipow2(unsigned n) { return n == 0 ? 1 : 2 * ce_ipow2(n - 1); } + +__attribute__((amdgpu_flat_work_group_size(ce_ipow2(5), ce_ipow2(6)))) +__global__ void cexpr_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(ipow2(5), 64))) +__global__ void non_cexpr_min_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, ipow2(6)))) +__global__ void non_cexpr_max_flat_work_group_size_32_64() {} + +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu("2"))) +__global__ void non_int_min_waves_per_eu_2() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, "4"))) +__global__ void non_int_max_waves_per_eu_2_4() {} + +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(nc_min))) +__global__ void non_cint_min_waves_per_eu_2() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, nc_max))) +__global__ void non_cint_min_waves_per_eu_2_4() {} + +__attribute__((amdgpu_waves_per_eu(c_min / 8))) +__global__ void cint_min_waves_per_eu_2() {} +__attribute__((amdgpu_waves_per_eu(c_min / 8, c_max / 8))) +__global__ void cint_min_waves_per_eu_2_4() {} + +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template<typename T> +__attribute__((amdgpu_waves_per_eu(T))) +__global__ void cint_min_waves_per_eu_2() {} +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template<typename T> +__attribute__((amdgpu_waves_per_eu(2, T))) +__global__ void cint_min_waves_per_eu_2_4() {} + +template<unsigned a> +__attribute__((amdgpu_waves_per_eu(a))) +__global__ void template_waves_per_eu_2() {} +template __global__ void template_waves_per_eu_2<2>(); + +template<unsigned a, unsigned b> +__attribute__((amdgpu_waves_per_eu(a, b))) +__global__ void template_waves_per_eu_2_4() {} +template __global__ void template_waves_per_eu_2_4<2, 4>(); + +template<unsigned a, unsigned b, unsigned c> +__attribute__((amdgpu_waves_per_eu(a + b, c - b))) +__global__ void template_complex_waves_per_eu_2_4() {} +template __global__ void template_complex_waves_per_eu_2_4<1, 1, 5>(); + +// expected-error@+2{{expression contains unexpanded parameter pack 'Args'}} +template<unsigned... Args> +__attribute__((amdgpu_waves_per_eu(Args))) +__global__ void template_waves_per_eu_2() {} +template __global__ void template_waves_per_eu_2<2, 4>(); + +__attribute__((amdgpu_waves_per_eu(ce_ipow2(1)))) +__global__ void cexpr_waves_per_eu_2() {} +__attribute__((amdgpu_waves_per_eu(ce_ipow2(1), ce_ipow2(2)))) +__global__ void cexpr_waves_per_eu_2_4() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(ipow2(1)))) +__global__ void non_cexpr_waves_per_eu_2() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, ipow2(2)))) +__global__ void non_cexpr_waves_per_eu_2_4() {} Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -344,6 +344,51 @@ Attr.getRange()); } +static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const AMDGPUFlatWorkGroupSizeAttr &Attr, Decl *New) { + // Both min and max expression are constant expressions. + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); + if (Result.isInvalid()) + return; + Expr *MinExpr = Result.getAs<Expr>(); + + Result = S.SubstExpr(Attr.getMax(), TemplateArgs); + if (Result.isInvalid()) + return; + Expr *MaxExpr = Result.getAs<Expr>(); + + S.addAMDGPUFlatWorkGroupSizeAttr(Attr.getLocation(), New, MinExpr, MaxExpr, + Attr.getSpellingListIndex()); +} + +static void instantiateDependentAMDGPUWavesPerEUAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const AMDGPUWavesPerEUAttr &Attr, Decl *New) { + // Both min and max expression are constant expressions. + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); + if (Result.isInvalid()) + return; + Expr *MinExpr = Result.getAs<Expr>(); + + Expr *MaxExpr = nullptr; + if (auto Max = Attr.getMax()) { + Result = S.SubstExpr(Max, TemplateArgs); + if (Result.isInvalid()) + return; + MaxExpr = Result.getAs<Expr>(); + } + + S.addAMDGPUWavesPerEUAttr(Attr.getLocation(), New, MinExpr, MaxExpr, + Attr.getSpellingListIndex()); +} + void Sema::InstantiateAttrsForDecl( const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, @@ -437,6 +482,18 @@ continue; } + if (const AMDGPUFlatWorkGroupSizeAttr *AMDGPUFlatWorkGroupSize = + dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(TmplAttr)) { + instantiateDependentAMDGPUFlatWorkGroupSizeAttr( + *this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New); + } + + if (const AMDGPUWavesPerEUAttr *AMDGPUFlatWorkGroupSize = + dyn_cast<AMDGPUWavesPerEUAttr>(TmplAttr)) { + instantiateDependentAMDGPUWavesPerEUAttr(*this, TemplateArgs, + *AMDGPUFlatWorkGroupSize, New); + } + // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { Index: clang/lib/Sema/SemaTemplate.cpp =================================================================== --- clang/lib/Sema/SemaTemplate.cpp +++ clang/lib/Sema/SemaTemplate.cpp @@ -4555,6 +4555,7 @@ EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); + SemaRef.ExprEvalContexts.back().Template = Template; return SemaRef.SubstExpr(Param->getDefaultArgument(), TemplateArgLists); } @@ -4805,8 +4806,8 @@ TemplateArgument Result; unsigned CurSFINAEErrors = NumSFINAEErrors; ExprResult Res = - CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(), - Result, CTAK); + CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(), + Result, CTAK, dyn_cast<TemplateDecl>(Template)); if (Res.isInvalid()) return true; // If the current template argument causes an error, give up now. @@ -6175,6 +6176,22 @@ return true; } +namespace { +FunctionDecl *GetFunctionDecl(Expr *Arg) { + Expr *E = Arg; + if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { + E = UO ? UO->getSubExpr() : nullptr; + } + if (DeclRefExpr *DRE = dyn_cast_or_null<DeclRefExpr>(E)) { + ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr; + if (Entity) { + if (auto Callee = dyn_cast<FunctionDecl>(Entity)) + return Callee; + } + } + return nullptr; +} +} // namespace /// Check a template argument against its corresponding /// non-type template parameter. /// @@ -6185,7 +6202,8 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, QualType ParamType, Expr *Arg, TemplateArgument &Converted, - CheckTemplateArgumentKind CTAK) { + CheckTemplateArgumentKind CTAK, + TemplateDecl *Template) { SourceLocation StartLoc = Arg->getBeginLoc(); // If the parameter type somehow involves auto, deduce the type now. @@ -6272,6 +6290,7 @@ // a constant-evaluated context. EnterExpressionEvaluationContext ConstantEvaluated( *this, Sema::ExpressionEvaluationContext::ConstantEvaluated); + ExprEvalContexts.back().Template = Template; if (getLangOpts().CPlusPlus17) { // C++17 [temp.arg.nontype]p1: @@ -6592,6 +6611,10 @@ return ExprError(); } + if (auto *FD = GetFunctionDecl(Arg)) + if (getLangOpts().CUDA && !CheckCUDACall(Arg->getBeginLoc(), FD)) + return ExprError(); + if (!ParamType->isMemberPointerType()) { if (CheckTemplateArgumentAddressOfObjectOrFunction(*this, Param, ParamType, Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -245,11 +245,11 @@ !Expr->isIntegerConstantExpr(I, S.Context)) { if (Idx != UINT_MAX) S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type) - << AI << Idx << AANT_ArgumentIntegerConstant + << &AI << Idx << AANT_ArgumentIntegerConstant << Expr->getSourceRange(); else S.Diag(getAttrLoc(AI), diag::err_attribute_argument_type) - << AI << AANT_ArgumentIntegerConstant << Expr->getSourceRange(); + << &AI << AANT_ArgumentIntegerConstant << Expr->getSourceRange(); return false; } @@ -261,7 +261,7 @@ if (StrictlyUnsigned && I.isSigned() && I.isNegative()) { S.Diag(getAttrLoc(AI), diag::err_attribute_requires_positive_integer) - << AI << /*non-negative*/ 1; + << &AI << /*non-negative*/ 1; return false; } @@ -5853,57 +5853,115 @@ } } -static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D, - const ParsedAttr &AL) { +static bool +checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr, + const AMDGPUFlatWorkGroupSizeAttr &Attr) { + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (MinExpr->isValueDependent() || MaxExpr->isValueDependent()) + return false; + uint32_t Min = 0; - Expr *MinExpr = AL.getArgAsExpr(0); - if (!checkUInt32Argument(S, AL, MinExpr, Min)) - return; + if (!checkUInt32Argument(S, Attr, MinExpr, Min, 0)) + return true; uint32_t Max = 0; - Expr *MaxExpr = AL.getArgAsExpr(1); - if (!checkUInt32Argument(S, AL, MaxExpr, Max)) - return; + if (!checkUInt32Argument(S, Attr, MaxExpr, Max, 1)) + return true; if (Min == 0 && Max != 0) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 0; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 0; + return true; } if (Min > Max) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 1; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 1; + return true; } - D->addAttr(::new (S.Context) - AMDGPUFlatWorkGroupSizeAttr(AL.getLoc(), S.Context, Min, Max, - AL.getAttributeSpellingListIndex())); + return false; } -static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - uint32_t Min = 0; - Expr *MinExpr = AL.getArgAsExpr(0); - if (!checkUInt32Argument(S, AL, MinExpr, Min)) +void Sema::addAMDGPUFlatWorkGroupSizeAttr(SourceRange AttrRange, Decl *D, + Expr *MinExpr, Expr *MaxExpr, + unsigned SpellingListIndex) { + AMDGPUFlatWorkGroupSizeAttr TmpAttr(AttrRange, Context, MinExpr, MaxExpr, + SpellingListIndex); + + if (checkAMDGPUFlatWorkGroupSizeArguments(*this, MinExpr, MaxExpr, TmpAttr)) return; + D->addAttr(::new (Context) AMDGPUFlatWorkGroupSizeAttr( + AttrRange, Context, MinExpr, MaxExpr, SpellingListIndex)); +} + +static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + Expr *MinExpr = AL.getArgAsExpr(0); + Expr *MaxExpr = AL.getArgAsExpr(1); + + S.addAMDGPUFlatWorkGroupSizeAttr(AL.getRange(), D, MinExpr, MaxExpr, + AL.getAttributeSpellingListIndex()); +} + +static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr, + Expr *MaxExpr, + const AMDGPUWavesPerEUAttr &Attr) { + if (S.DiagnoseUnexpandedParameterPack(MinExpr) || + (MaxExpr && S.DiagnoseUnexpandedParameterPack(MaxExpr))) + return true; + + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent())) + return false; + + uint32_t Min = 0; + if (!checkUInt32Argument(S, Attr, MinExpr, Min, 0)) + return true; + uint32_t Max = 0; - if (AL.getNumArgs() == 2) { - Expr *MaxExpr = AL.getArgAsExpr(1); - if (!checkUInt32Argument(S, AL, MaxExpr, Max)) - return; - } + if (MaxExpr && !checkUInt32Argument(S, Attr, MaxExpr, Max, 1)) + return true; if (Min == 0 && Max != 0) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 0; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 0; + return true; } if (Max != 0 && Min > Max) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 1; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 1; + return true; } - D->addAttr(::new (S.Context) - AMDGPUWavesPerEUAttr(AL.getLoc(), S.Context, Min, Max, - AL.getAttributeSpellingListIndex())); + return false; +} + +void Sema::addAMDGPUWavesPerEUAttr(SourceRange AttrRange, Decl *D, + Expr *MinExpr, Expr *MaxExpr, + unsigned SpellingListIndex) { + AMDGPUWavesPerEUAttr TmpAttr(AttrRange, Context, MinExpr, MaxExpr, + SpellingListIndex); + + if (checkAMDGPUWavesPerEUArguments(*this, MinExpr, MaxExpr, TmpAttr)) + return; + + D->addAttr(::new (Context) AMDGPUWavesPerEUAttr(AttrRange, Context, MinExpr, + MaxExpr, SpellingListIndex)); +} + +static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!checkAttributeAtLeastNumArgs(S, AL, 1) || + !checkAttributeAtMostNumArgs(S, AL, 2)) + return; + + Expr *MinExpr = AL.getArgAsExpr(0); + Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr; + + S.addAMDGPUWavesPerEUAttr(AL.getRange(), D, MinExpr, MaxExpr, + AL.getAttributeSpellingListIndex()); } static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) { Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -675,9 +675,22 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); + + auto &ExprEvalCtx = ExprEvalContexts.back(); + if (ExprEvalCtx.isUnevaluated()) + return true; + + FunctionDecl *Caller = nullptr; + if (auto *Template = ExprEvalContexts.back().Template) { + if (auto *FD = dyn_cast<FunctionDecl>(Template->getTemplatedDecl())) + Caller = FD; + } else if (ExprEvalCtx.isConstantEvaluated()) + return true; + // FIXME: Is bailing out early correct here? Should we instead assume that // the caller is a global initializer? - FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); + if (!Caller) + Caller = dyn_cast<FunctionDecl>(CurContext); if (!Caller) return true; Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -7797,8 +7797,16 @@ const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); if (ReqdWGS || FlatWGS) { - unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; - unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + Max = FlatWGS->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + } if (ReqdWGS && Min == 0 && Max == 0) Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); @@ -7812,8 +7820,12 @@ } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + unsigned Max = Attr->getMax() ? Attr->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 0; if (Min != 0) { assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -1048,6 +1048,10 @@ EK_Decltype, EK_TemplateArgument, EK_Other } ExprContext; + /// If we are checking arguments of a template, this is the template + /// under check. + TemplateDecl *Template; + ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -1056,7 +1060,7 @@ : Context(Context), ParentCleanup(ParentCleanup), NumCleanupObjects(NumCleanupObjects), NumTypos(0), ManglingContextDecl(ManglingContextDecl), MangleNumbering(), - ExprContext(ExprContext) {} + ExprContext(ExprContext), Template(nullptr) {} /// Retrieve the mangling numbering context, used to consistently /// number constructs like lambdas for mangling. @@ -6537,10 +6541,12 @@ bool CheckTemplateArgument(TemplateTypeParmDecl *Param, TypeSourceInfo *Arg); - ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param, - QualType InstantiatedParamType, Expr *Arg, - TemplateArgument &Converted, - CheckTemplateArgumentKind CTAK = CTAK_Specified); + ExprResult + CheckTemplateArgument(NonTypeTemplateParmDecl *Param, + QualType InstantiatedParamType, Expr *Arg, + TemplateArgument &Converted, + CheckTemplateArgumentKind CTAK = CTAK_Specified, + TemplateDecl *Template = nullptr); bool CheckTemplateTemplateArgument(TemplateParameterList *Params, TemplateArgumentLoc &Arg); @@ -8659,6 +8665,16 @@ void AddXConsumedAttr(Decl *D, SourceRange SR, unsigned SpellingIndex, RetainOwnershipKind K, bool IsTemplateInstantiation); + /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size + /// attribute to a particular declaration. + void addAMDGPUFlatWorkGroupSizeAttr(SourceRange AttrRange, Decl *D, Expr *Min, + Expr *Max, unsigned SpellingListIndex); + + /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a + /// particular declaration. + void addAMDGPUWavesPerEUAttr(SourceRange AttrRange, Decl *D, Expr *Min, + Expr *Max, unsigned SpellingListIndex); + bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type); //===--------------------------------------------------------------------===// Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -1484,14 +1484,14 @@ def AMDGPUFlatWorkGroupSize : InheritableAttr { let Spellings = [Clang<"amdgpu_flat_work_group_size", 0>]; - let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">]; + let Args = [ExprArgument<"Min">, ExprArgument<"Max">]; let Documentation = [AMDGPUFlatWorkGroupSizeDocs]; let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } def AMDGPUWavesPerEU : InheritableAttr { let Spellings = [Clang<"amdgpu_waves_per_eu", 0>]; - let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max", 1>]; + let Args = [ExprArgument<"Min">, ExprArgument<"Max", 1>]; let Documentation = [AMDGPUWavesPerEUDocs]; let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } Index: .gitignore =================================================================== --- /dev/null +++ .gitignore @@ -0,0 +1 @@ +build.*/
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits