https://github.com/sarnex updated https://github.com/llvm/llvm-project/pull/137882
>From 080a9d43ba6544d46c2b36c5dc6a5af421264580 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" <nick.sar...@intel.com> Date: Wed, 7 May 2025 12:17:30 -0700 Subject: [PATCH 1/2] [clang] Simplify device kernel attributes Signed-off-by: Sarnie, Nick <nick.sar...@intel.com> --- clang/include/clang/AST/GlobalDecl.h | 2 +- clang/include/clang/Basic/Attr.td | 66 +++++++++++-------- clang/include/clang/Basic/Specifiers.h | 5 +- clang/lib/AST/Decl.cpp | 4 +- clang/lib/AST/ItaniumMangle.cpp | 6 +- clang/lib/AST/MicrosoftMangle.cpp | 3 +- clang/lib/AST/Type.cpp | 8 +-- clang/lib/AST/TypePrinter.cpp | 9 +-- clang/lib/Basic/Targets/AArch64.cpp | 4 +- clang/lib/Basic/Targets/AMDGPU.h | 3 +- clang/lib/Basic/Targets/ARM.cpp | 4 +- clang/lib/Basic/Targets/BPF.h | 2 +- clang/lib/Basic/Targets/Mips.cpp | 2 +- clang/lib/Basic/Targets/SPIR.h | 2 +- clang/lib/Basic/Targets/SystemZ.h | 2 +- clang/lib/Basic/Targets/X86.h | 23 +++++-- clang/lib/CodeGen/CGCall.cpp | 37 +++++++---- clang/lib/CodeGen/CGDebugInfo.cpp | 5 +- clang/lib/CodeGen/CGExpr.cpp | 4 +- clang/lib/CodeGen/CodeGenFunction.cpp | 4 +- clang/lib/CodeGen/CodeGenModule.cpp | 9 +-- clang/lib/CodeGen/TargetInfo.cpp | 2 +- clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 +- clang/lib/CodeGen/Targets/NVPTX.cpp | 4 +- clang/lib/CodeGen/Targets/SPIR.cpp | 2 +- clang/lib/CodeGen/Targets/TCE.cpp | 2 +- clang/lib/Sema/SemaDecl.cpp | 14 ++-- clang/lib/Sema/SemaDeclAttr.cpp | 61 ++++++++++++----- clang/lib/Sema/SemaSYCL.cpp | 2 +- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 8 +-- clang/lib/Sema/SemaType.cpp | 27 ++++++-- ...a-attribute-supported-attributes-list.test | 1 - clang/tools/libclang/CXType.cpp | 4 +- llvm/include/llvm/BinaryFormat/Dwarf.def | 2 +- .../llvm/DebugInfo/DWARF/DWARFTypePrinter.h | 8 ++- .../preload-implicit-kernargs-debug-info.ll | 2 +- 36 files changed, 213 insertions(+), 136 deletions(-) diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index baf5371d2682d..97caff0198cb0 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -164,7 +164,7 @@ class GlobalDecl { } static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) { - return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice) + return (D->hasAttr<DeviceKernelAttr>() || D->getLangOpts().CUDAIsDevice) ? KernelReferenceKind::Kernel : KernelReferenceKind::Stub; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index df7bba094fce6..1ce7f0c1835ac 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -190,8 +190,9 @@ def FunctionPointer : SubsetSubject<DeclBase, "functions pointers">; def OpenCLKernelFunction - : SubsetSubject<Function, [{S->hasAttr<OpenCLKernelAttr>()}], - "kernel functions">; + : SubsetSubject<Function, [{S->getASTContext().getLangOpts().OpenCL && + S->hasAttr<DeviceKernelAttr>()}], + "kernel functions">; // HasFunctionProto is a more strict version of FunctionLike, so it should // never be specified in a Subjects list along with FunctionLike (due to the @@ -1498,12 +1499,6 @@ def CUDAGridConstant : InheritableAttr { let Documentation = [CUDAGridConstantAttrDocs]; } -def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> { - let Spellings = [Clang<"nvptx_kernel">]; - let Subjects = SubjectList<[Function]>; - let Documentation = [Undocumented]; -} - def HIPManaged : InheritableAttr { let Spellings = [GNU<"managed">, Declspec<"__managed__">]; let Subjects = SubjectList<[Var]>; @@ -1538,11 +1533,44 @@ def CUDAShared : InheritableAttr { } def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>; -def SYCLKernel : InheritableAttr { - let Spellings = [Clang<"sycl_kernel">]; - let Subjects = SubjectList<[FunctionTmpl]>; - let LangOpts = [SYCLDevice]; +def DeviceKernel : DeclOrTypeAttr { + let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">, + Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">, + CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; + let LangOpts = []; let Documentation = [SYCLKernelDocs]; + let AdditionalMembers = + [{ + inline bool isAMDGPUSpelling() const { + return isAMDGPUSpelling(*this); + } + template<typename T> + static inline bool isAMDGPUSpelling(const T& Attr) { + return Attr.getAttrName()->getName() == "amdgpu_kernel"; + } + inline bool isNVPTXSpelling() const { + return isNVPTXSpelling(*this); + } + template<typename T> + static inline bool isNVPTXSpelling(const T& Attr) { + return Attr.getAttrName()->getName() == "nvptx_kernel"; + } + inline bool isOpenCLSpelling() const { + return isOpenCLSpelling(*this); + } + template<typename T> + static inline bool isOpenCLSpelling(const T& Attr) { + return Attr.getAttrName()->getName() == "kernel" || + Attr.getAttrName()->getName() == "__kernel"; + } + inline bool isSYCLSpelling() const { + return isSYCLSpelling(*this); + } + template<typename T> + static inline bool isSYCLSpelling(const T& Attr) { + return Attr.getAttrName()->getName() == "sycl_kernel"; + } +}]; } def SYCLKernelEntryPoint : InheritableAttr { @@ -1608,15 +1636,6 @@ def Allocating : TypeAttr { let Documentation = [AllocatingDocs]; } -// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because -// the specification does not expose them with one currently. -def OpenCLKernel : InheritableAttr { - let Spellings = [CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; - let Subjects = SubjectList<[Function], ErrorDiag>; - let Documentation = [Undocumented]; - let SimpleHandler = 1; -} - def OpenCLUnrollHint : StmtAttr { let Spellings = [GNU<"opencl_unroll_hint">]; let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], @@ -2351,11 +2370,6 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } -def AMDGPUKernelCall : DeclOrTypeAttr { - let Spellings = [Clang<"amdgpu_kernel">]; - let Documentation = [Undocumented]; -} - def BPFPreserveAccessIndex : InheritableAttr, TargetSpecificAttr<TargetBPF> { let Spellings = [Clang<"preserve_access_index">]; diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 491badcc804e7..698fd9da5ced1 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -289,14 +289,13 @@ namespace clang { CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels + CC_DeviceKernel, // __attribute__((device_kernel)) CC_Swift, // __attribute__((swiftcall)) CC_SwiftAsync, // __attribute__((swiftasynccall)) CC_PreserveMost, // __attribute__((preserve_most)) CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) - CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) CC_M68kRTD, // __attribute__((m68k_rtd)) CC_PreserveNone, // __attribute__((preserve_none)) CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) @@ -326,7 +325,7 @@ namespace clang { case CC_X86Pascal: case CC_X86VectorCall: case CC_SpirFunction: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_Swift: case CC_SwiftAsync: case CC_M68kRTD: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 07b4d77bd2ab7..c07e1c4d14ddf 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const { } bool FunctionDecl::isInExternCContext() const { - if (hasAttr<OpenCLKernelAttr>()) + if (hasAttr<DeviceKernelAttr>() && getASTContext().getLangOpts().OpenCL) return true; return getLexicalDeclContext()->isExternCContext(); } @@ -5512,7 +5512,7 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { } bool FunctionDecl::isReferenceableKernel() const { - return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>(); + return hasAttr<CUDAGlobalAttr>() || hasAttr<DeviceKernelAttr>(); } BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 33a8728728574..9e9d5d946e788 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -1556,7 +1556,8 @@ void CXXNameMangler::mangleUnqualifiedName( FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; bool IsOCLDeviceStub = - FD && FD->hasAttr<OpenCLKernelAttr>() && + getASTContext().getLangOpts().OpenCL && FD && + FD->hasAttr<DeviceKernelAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleDeviceStubName(II); @@ -3529,10 +3530,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_AAPCS_VFP: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: - case CC_AMDGPUKernelCall: case CC_IntelOclBicc: case CC_SpirFunction: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_M68kRTD: diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index add737b762ccc..36dd3ecd17626 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1164,7 +1164,8 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, ->hasAttr<CUDAGlobalAttr>())) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; bool IsOCLDeviceStub = - ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() && + getASTContext().getLangOpts().OpenCL && ND && + isa<FunctionDecl>(ND) && ND->hasAttr<DeviceKernelAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleSourceName( diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 392a95d042353..61b4febf6159c 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3594,14 +3594,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { return "aarch64_vector_pcs"; case CC_AArch64SVEPCS: return "aarch64_sve_pcs"; - case CC_AMDGPUKernelCall: - return "amdgpu_kernel"; case CC_IntelOclBicc: return "intel_ocl_bicc"; case CC_SpirFunction: return "spir_function"; - case CC_OpenCLKernel: - return "opencl_kernel"; + case CC_DeviceKernel: + return "device_kernel"; case CC_Swift: return "swiftcall"; case CC_SwiftAsync: @@ -4302,7 +4300,7 @@ bool AttributedType::isCallingConv() const { case attr::VectorCall: case attr::AArch64VectorPcs: case attr::AArch64SVEPcs: - case attr::AMDGPUKernelCall: + case attr::DeviceKernel: case attr::Pascal: case attr::MSABI: case attr::SysVABI: diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index cba1a2d98d660..1c92abba73905 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1096,8 +1096,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_AArch64SVEPCS: OS << "__attribute__((aarch64_sve_pcs))"; break; - case CC_AMDGPUKernelCall: - OS << "__attribute__((amdgpu_kernel))"; + case CC_DeviceKernel: + OS << "__attribute__((device_kernel))"; break; case CC_IntelOclBicc: OS << " __attribute__((intel_ocl_bicc))"; @@ -1112,7 +1112,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, OS << " __attribute__((regcall))"; break; case CC_SpirFunction: - case CC_OpenCLKernel: // Do nothing. These CCs are not available as attributes. break; case CC_Swift: @@ -2065,7 +2064,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; - case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break; + case attr::DeviceKernel: + OS << T->getAttr()->getSpelling(); + break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; case attr::PreserveMost: OS << "preserve_most"; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 3633bab6e0df9..ad3c64fa68049 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1341,7 +1341,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const { case CC_PreserveMost: case CC_PreserveAll: case CC_PreserveNone: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: case CC_Win64: @@ -1699,7 +1699,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86FastCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_PreserveNone: diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 8ea544ba28b10..509128f3cf070 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -415,8 +415,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { default: return CCCR_Warning; case CC_C: - case CC_OpenCLKernel: - case CC_AMDGPUKernelCall: + case CC_DeviceKernel: return CCCR_OK; } } diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index ca2c1ffbb0eb7..acf28b7a71454 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -1405,7 +1405,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_AAPCS_VFP: case CC_Swift: case CC_SwiftAsync: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; default: return CCCR_Warning; @@ -1480,7 +1480,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86VectorCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h index d1f68b842348e..d9e5cf4d8a92f 100644 --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -94,7 +94,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo { default: return CCCR_Warning; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; } } diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp index 0bf5a062d3192..971a62b6c4037 100644 --- a/clang/lib/Basic/Targets/Mips.cpp +++ b/clang/lib/Basic/Targets/Mips.cpp @@ -337,7 +337,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86VectorCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index bf249e271a870..1521b3e9eada3 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -191,7 +191,7 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { } CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { - return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK + return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK : CCCR_Warning; } diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 1f69530c4757f..8a54ca4b75d7b 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -244,7 +244,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { switch (CC) { case CC_C: case CC_Swift: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; case CC_SwiftAsync: return CCCR_Error; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 2f6fb33a7b597..711c5acb5d8b8 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -408,10 +408,11 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { case CC_Swift: case CC_X86Pascal: case CC_IntelOclBicc: - case CC_OpenCLKernel: return CCCR_OK; case CC_SwiftAsync: return CCCR_Error; + case CC_DeviceKernel: + return IsOpenCL ? CCCR_OK : CCCR_Warning; default: return CCCR_Warning; } @@ -439,7 +440,13 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { uint64_t getPointerAlignV(LangAS AddrSpace) const override { return getPointerWidthV(AddrSpace); } + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + IsOpenCL = Opts.OpenCL; + } +private: + bool IsOpenCL = false; }; // X86-32 generic target @@ -785,8 +792,9 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { case CC_PreserveAll: case CC_PreserveNone: case CC_X86RegCall: - case CC_OpenCLKernel: return CCCR_OK; + case CC_DeviceKernel: + return IsOpenCL ? CCCR_OK : CCCR_Warning; default: return CCCR_Warning; } @@ -817,7 +825,6 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize, HasSizeMismatch); } - void setMaxAtomicWidth() override { if (hasFeature("cx16")) MaxAtomicInlineWidth = 128; @@ -829,6 +836,14 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { size_t getMaxBitIntWidth() const override { return llvm::IntegerType::MAX_INT_BITS; } + + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + IsOpenCL = Opts.OpenCL; + } + +private: + bool IsOpenCL = false; }; // x86-64 UEFI target @@ -913,7 +928,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64TargetInfo case CC_Swift: case CC_SwiftAsync: case CC_X86RegCall: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; default: return CCCR_Warning; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 2f1c7699d27c3..6c64193a3777d 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -80,12 +80,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { return llvm::CallingConv::AArch64_VectorCall; case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall; - case CC_AMDGPUKernelCall: - return llvm::CallingConv::AMDGPU_KERNEL; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; - case CC_OpenCLKernel: - return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + case CC_DeviceKernel: { + if (CGM.getLangOpts().OpenCL) + return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + if (CGM.getTriple().isSPIROrSPIRV()) + return llvm::CallingConv::SPIR_KERNEL; + if (CGM.getTriple().isAMDGPU()) + return llvm::CallingConv::AMDGPU_KERNEL; + if (CGM.getTriple().isNVPTX()) + return llvm::CallingConv::PTX_Kernel; + llvm_unreachable("Unknown kernel calling convention"); + } case CC_PreserveMost: return llvm::CallingConv::PreserveMost; case CC_PreserveAll: @@ -253,7 +260,8 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) { FTP); } -static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, +static CallingConv getCallingConventionForDecl(const CodeGenModule &CGM, + const ObjCMethodDecl *D, bool IsWindows) { // Set the appropriate calling convention for the Function. if (D->hasAttr<StdCallAttr>()) @@ -283,8 +291,8 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr<AArch64SVEPcsAttr>()) return CC_AArch64SVEPCS; - if (D->hasAttr<AMDGPUKernelCallAttr>()) - return CC_AMDGPUKernelCall; + if (D->hasAttr<DeviceKernelAttr>()) + return CC_DeviceKernel; if (D->hasAttr<IntelOclBiccAttr>()) return CC_IntelOclBicc; @@ -532,7 +540,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) { assert(isa<FunctionType>(FTy)); setCUDAKernelCallingConvention(FTy, CGM, FD); - if (FD->hasAttr<OpenCLKernelAttr>() && + if (getContext().getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { const FunctionType *FT = FTy->getAs<FunctionType>(); CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT); @@ -582,7 +590,8 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, FunctionType::ExtInfo einfo; bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows(); - einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows)); + einfo = + einfo.withCallingConv(getCallingConventionForDecl(CGM, MD, IsWindows)); if (getContext().getLangOpts().ObjCAutoRefCount && MD->hasAttr<NSReturnsRetainedAttr>()) @@ -757,7 +766,7 @@ CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType, return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, argTypes, - FunctionType::ExtInfo(CC_OpenCLKernel), + FunctionType::ExtInfo(CC_DeviceKernel), /*paramInfos=*/{}, RequiredArgs::All); } @@ -2510,7 +2519,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, NumElemsParam); } - if (TargetDecl->hasAttr<OpenCLKernelAttr>() && + if (getLangOpts().OpenCL && TargetDecl->hasAttr<DeviceKernelAttr>() && CallingConv != CallingConv::CC_C && CallingConv != CallingConv::CC_SpirFunction) { // Check CallingConv to avoid adding uniform-work-group-size attribute to @@ -2889,8 +2898,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // > For arguments to a __kernel function declared to be a pointer to a // > data type, the OpenCL compiler can assume that the pointee is always // > appropriately aligned as required by the data type. - if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() && - ParamType->isPointerType()) { + if (getLangOpts().OpenCL && TargetDecl && + TargetDecl->hasAttr<DeviceKernelAttr>() && ParamType->isPointerType()) { QualType PTy = ParamType->getPointeeType(); if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { llvm::Align Alignment = @@ -4598,7 +4607,7 @@ void CodeGenFunction::EmitCallArgs( if (MD) { IsVariadic = MD->isVariadic(); ExplicitCC = getCallingConventionForDecl( - MD, CGM.getTarget().getTriple().isOSWindows()); + CGM, MD, CGM.getTarget().getTriple().isOSWindows()); ArgTypes.assign(MD->param_type_begin() + ParamsToSkip, MD->param_type_end()); } else { diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index f3ec498d4064b..e6f133bc5c68e 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1589,9 +1589,8 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_IntelOclBicc; case CC_SpirFunction: return llvm::dwarf::DW_CC_LLVM_SpirFunction; - case CC_OpenCLKernel: - case CC_AMDGPUKernelCall: - return llvm::dwarf::DW_CC_LLVM_OpenCLKernel; + case CC_DeviceKernel: + return llvm::dwarf::DW_CC_LLVM_DeviceKernel; case CC_Swift: return llvm::dwarf::DW_CC_LLVM_Swift; case CC_SwiftAsync: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 2e01adc51fdf0..7f3c3984b9a9d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5813,7 +5813,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { } static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) { - if (FD->hasAttr<OpenCLKernelAttr>()) + if (FD->hasAttr<DeviceKernelAttr>()) return GlobalDecl(FD, KernelReferenceKind::Stub); return GlobalDecl(FD); } @@ -6235,7 +6235,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const auto *FnType = cast<FunctionType>(PointeeType); if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl); - FD && FD->hasAttr<OpenCLKernelAttr>()) + FD && FD->hasAttr<DeviceKernelAttr>()) CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType); // If we are checking indirect calls and this call is indirect, check that the diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index d773cdd505ff4..9a5824002e8d9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -621,7 +621,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn) { - if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>()) + if (!FD->hasAttr<DeviceKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>()) return; llvm::LLVMContext &Context = getLLVMContext(); @@ -1595,7 +1595,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // Implicit copy-assignment gets the same special treatment as implicit // copy-constructors. emitImplicitAssignmentOperatorBody(Args); - } else if (FD->hasAttr<OpenCLKernelAttr>() && + } else if (getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) { CallArgList CallArgs; for (unsigned i = 0; i < Args.size(); ++i) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c27817604f6ca..7d93911ce92ff 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1908,7 +1908,8 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } else if (FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); - } else if (FD && FD->hasAttr<OpenCLKernelAttr>() && + } else if (FD && FD->hasAttr<DeviceKernelAttr>() && + CGM.getLangOpts().OpenCL && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__clang_ocl_kern_imp_" << II->getName(); } else { @@ -3923,7 +3924,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { - if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody()) + if (FD->hasAttr<DeviceKernelAttr>() && FD->doesThisDeclarationHaveABody()) addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub)); // Update deferred annotations with the latest declaration if the function @@ -4893,7 +4894,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, if (!Ty) { const auto *FD = cast<FunctionDecl>(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); - if (FD->hasAttr<OpenCLKernelAttr>() && + if (FD->hasAttr<DeviceKernelAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); Ty = getTypes().GetFunctionType(FI); @@ -6177,7 +6178,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, (CodeGenOpts.OptimizationLevel == 0) && !D->hasAttr<MinSizeAttr>(); - if (D->hasAttr<OpenCLKernelAttr>()) { + if (getLangOpts().OpenCL && D->hasAttr<DeviceKernelAttr>()) { if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub && !D->hasAttr<NoInlineAttr>() && !Fn->hasFnAttribute(llvm::Attribute::NoInline) && diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 981488eb4dc37..d0c8e40ee5a17 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -191,7 +191,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name, &CGF.CGM.getModule()); llvm::CallingConv::ID KernelCC = - CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel); + CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_DeviceKernel); F->setCallingConv(KernelCC); llvm::AttrBuilder KernelAttrs(C); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index bcf039d9f268a..f5d9e30ec7fbd 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -336,7 +336,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, return false; return !D->hasAttr<OMPDeclareTargetDeclAttr>() && - (D->hasAttr<OpenCLKernelAttr>() || + (D->hasAttr<DeviceKernelAttr>() || (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || (isa<VarDecl>(D) && (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || @@ -349,7 +349,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); + M.getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>(); const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); @@ -571,7 +571,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( const FunctionType *&FT) const { FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel)); } /// Return IR struct type for rtinfo struct in rocm-device-libs used for device diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 25ab28c54b659..139a0094b16fc 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -263,7 +263,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( if (M.getLangOpts().OpenCL) { // Use OpenCL function attributes to check for kernel functions // By default, all functions are device functions - if (FD->hasAttr<OpenCLKernelAttr>()) { + if (FD->hasAttr<DeviceKernelAttr>()) { // OpenCL __kernel functions get kernel metadata // Create !{<func-ref>, metadata !"kernel", i32 1} node F->setCallingConv(llvm::CallingConv::PTX_Kernel); @@ -292,7 +292,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } // Attach kernel metadata directly if compiling for NVPTX. - if (FD->hasAttr<NVPTXKernelAttr>()) { + if (FD->hasAttr<DeviceKernelAttr>()) { F->setCallingConv(llvm::CallingConv::PTX_Kernel); } } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index f35c124f50aa0..9c186a16e787a 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -227,7 +227,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( // Convert HIP kernels to SPIR-V kernels. if (getABIInfo().getContext().getLangOpts().HIP) { FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel)); return; } } diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp index f3685ccd9825a..df49aea49a1e3 100644 --- a/clang/lib/CodeGen/Targets/TCE.cpp +++ b/clang/lib/CodeGen/Targets/TCE.cpp @@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast<llvm::Function>(GV); if (M.getLangOpts().OpenCL) { - if (FD->hasAttr<OpenCLKernelAttr>()) { + if (FD->hasAttr<DeviceKernelAttr>()) { // OpenCL C Kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6b561d7bfc6e7..2a11c212d090c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8782,7 +8782,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { FunctionDecl *FD = getCurFunctionDecl(); // OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables // in functions. - if (FD && !FD->hasAttr<OpenCLKernelAttr>()) { + if (FD && !FD->hasAttr<DeviceKernelAttr>()) { if (T.getAddressSpace() == LangAS::opencl_constant) Diag(NewVD->getLocation(), diag::err_opencl_function_variable) << 0 /*non-kernel only*/ << "constant"; @@ -8794,7 +8794,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { } // OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be // in the outermost scope of a kernel function. - if (FD && FD->hasAttr<OpenCLKernelAttr>()) { + if (FD && FD->hasAttr<DeviceKernelAttr>()) { if (!getCurScope()->isFunctionScope()) { if (T.getAddressSpace() == LangAS::opencl_constant) Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope) @@ -10927,9 +10927,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, MarkUnusedFileScopedDecl(NewFD); - - - if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) { + if (getLangOpts().OpenCL && NewFD->hasAttr<DeviceKernelAttr>()) { // OpenCL v1.2 s6.8 static is invalid for kernel functions. if (SC == SC_Static) { Diag(D.getIdentifierLoc(), diag::err_static_kernel); @@ -12434,7 +12432,7 @@ void Sema::CheckMain(FunctionDecl *FD, const DeclSpec &DS) { if (getLangOpts().OpenCL) { Diag(FD->getLocation(), diag::err_opencl_no_main) - << FD->hasAttr<OpenCLKernelAttr>(); + << FD->hasAttr<DeviceKernelAttr>(); FD->setInvalidDecl(); return; } @@ -15696,7 +15694,7 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD, return false; // Don't warn for OpenCL kernels. - if (FD->hasAttr<OpenCLKernelAttr>()) + if (FD->hasAttr<DeviceKernelAttr>()) return false; // Don't warn on explicitly deleted functions. @@ -20580,7 +20578,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, // SYCL functions can be template, so we check if they have appropriate // attribute prior to checking if it is a template. - if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>()) + if (LangOpts.SYCLIsDevice && FD->hasAttr<DeviceKernelAttr>()) return FunctionEmissionStatus::Emitted; // Templates are emitted when they're instantiated. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index bfb3ee9dcbd16..012017e559324 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5087,8 +5087,8 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice) S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD; - if (AL.getKind() == ParsedAttr::AT_NVPTXKernel) - D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL)); + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) + D->addAttr(::new (S.Context) DeviceKernelAttr(S.Context, AL)); else D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL)); // In host compilation the kernel is emitted as a stub function, which is @@ -5223,9 +5223,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_AArch64SVEPcs: D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL)); return; - case ParsedAttr::AT_AMDGPUKernelCall: - D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL)); + case ParsedAttr::AT_DeviceKernel: { + // The attribute should already be applied. + assert(D->hasAttr<DeviceKernelAttr>() && "Expected attribute"); return; + } case ParsedAttr::AT_IntelOclBicc: D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL)); return; @@ -5268,6 +5270,33 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } } +static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate(); + if (S.getLangOpts().SYCLIsDevice) { + if (!IsFunctionTemplate) { + S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "function templates"; + } else { + S.SYCL().handleKernelAttr(D, AL); + } + } else if (DeviceKernelAttr::isSYCLSpelling(AL)) { + S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; + } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { + handleGlobalAttr(S, D, AL); + } else { + // OpenCL C++ will throw a more specific error. + if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { + S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "functions"; + } + handleSimpleAttribute<DeviceKernelAttr>(S, D, AL); + } + // Make sure we validate the CC with the target + // and warn/error if necessary. + handleCallConvAttr(S, D, AL); +} + static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (AL.getAttributeSpellingListIndex() == SuppressAttr::CXX11_gsl_suppress) { // Suppression attribute with GSL spelling requires at least 1 argument. @@ -5429,9 +5458,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_AArch64SVEPcs: CC = CC_AArch64SVEPCS; break; - case ParsedAttr::AT_AMDGPUKernelCall: - CC = CC_AMDGPUKernelCall; - break; case ParsedAttr::AT_RegCall: CC = CC_X86RegCall; break; @@ -5503,6 +5529,11 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, llvm::Log2_64(ABIVLen) - 5); break; } + case ParsedAttr::AT_DeviceKernel: { + // Validation was handled in handleDeviceKernelAttr. + CC = CC_DeviceKernel; + break; + } default: llvm_unreachable("unexpected attribute kind"); } @@ -7106,9 +7137,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_EnumExtensibility: handleEnumExtensibilityAttr(S, D, AL); break; - case ParsedAttr::AT_SYCLKernel: - S.SYCL().handleKernelAttr(D, AL); - break; case ParsedAttr::AT_SYCLKernelEntryPoint: S.SYCL().handleKernelEntryPointAttr(D, AL); break; @@ -7133,7 +7161,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_CalledOnce: handleCalledOnceAttr(S, D, AL); break; - case ParsedAttr::AT_NVPTXKernel: case ParsedAttr::AT_CUDAGlobal: handleGlobalAttr(S, D, AL); break; @@ -7397,13 +7424,15 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_PreserveAll: case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: - case ParsedAttr::AT_AMDGPUKernelCall: case ParsedAttr::AT_M68kRTD: case ParsedAttr::AT_PreserveNone: case ParsedAttr::AT_RISCVVectorCC: case ParsedAttr::AT_RISCVVLSCC: handleCallConvAttr(S, D, AL); break; + case ParsedAttr::AT_DeviceKernel: + handleDeviceKernelAttr(S, D, AL); + break; case ParsedAttr::AT_Suppress: handleSuppressAttr(S, D, AL); break; @@ -7713,9 +7742,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, static bool isKernelDecl(Decl *D) { const FunctionType *FnTy = D->getFunctionType(); - return D->hasAttr<OpenCLKernelAttr>() || - (FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) || - D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>(); + return D->hasAttr<DeviceKernelAttr>() || + (FnTy && FnTy->getCallConv() == CallingConv::CC_DeviceKernel) || + D->hasAttr<CUDAGlobalAttr>(); } void Sema::ProcessDeclAttributeList( @@ -7742,7 +7771,7 @@ void Sema::ProcessDeclAttributeList( // good to have a way to specify "these attributes must appear as a group", // for these. Additionally, it would be good to have a way to specify "these // attribute must never appear as a group" for attributes like cold and hot. - if (!(D->hasAttr<OpenCLKernelAttr>() || + if (!(D->hasAttr<DeviceKernelAttr>() || (D->hasAttr<CUDAGlobalAttr>() && Context.getTargetInfo().getTriple().isSPIRV()))) { // These attributes cannot be applied to a non-kernel function. diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1969d7b0ba837..3e03cb4bd5f99 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) { return; } - handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL); + handleSimpleAttribute<DeviceKernelAttr>(*this, D, AL); } void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 08b3a423d1526..4f21166db34b0 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -668,9 +668,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. -static void instantiateDependentSYCLKernelAttr( +static void instantiateDependentDeviceKernelAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, - const SYCLKernelAttr &Attr, Decl *New) { + const DeviceKernelAttr &Attr, Decl *New) { New->addAttr(Attr.clone(S.getASTContext())); } @@ -912,8 +912,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } - if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) { - instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New); + if (auto *A = dyn_cast<DeviceKernelAttr>(TmplAttr)) { + instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New); continue; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 294daef70c339..3c6532e85eee5 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -134,7 +134,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ - case ParsedAttr::AT_AMDGPUKernelCall: \ + case ParsedAttr::AT_DeviceKernel: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -3754,14 +3754,14 @@ static CallingConv getCCForDeclaratorChunk( CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); - // Attribute AT_OpenCLKernel affects the calling convention for SPIR + // Attribute AT_DeviceKernel affects the calling convention for SPIR // and AMDGPU targets, hence it cannot be treated as a calling // convention attribute. This is the simplest place to infer // calling convention for OpenCL kernels. if (S.getLangOpts().OpenCL) { for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { - if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) { - CC = CC_OpenCLKernel; + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { + CC = CC_DeviceKernel; break; } } @@ -3774,7 +3774,7 @@ static CallingConv getCCForDeclaratorChunk( if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) { for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) { - CC = CC_OpenCLKernel; + CC = CC_DeviceKernel; break; } } @@ -7530,8 +7530,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr); case ParsedAttr::AT_ArmStreaming: return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr); - case ParsedAttr::AT_AMDGPUKernelCall: - return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr); + case ParsedAttr::AT_DeviceKernel: + return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, @@ -8719,6 +8719,16 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State, } } +static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) { + // The DeviceKernel attribute is shared for many targets, and + // it is only allowed to be a type attribute with the AMDGPU + // spelling, so skip processing the attr as a type attr + // unless it has that spelling. + if (Attr.getKind() != ParsedAttr::AT_DeviceKernel) + return true; + return DeviceKernelAttr::isAMDGPUSpelling(Attr); +} + static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, const ParsedAttributesView &attrs, @@ -8974,6 +8984,9 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, break; [[fallthrough]]; FUNCTION_TYPE_ATTRS_CASELIST: + if (!isMultiSubjectAttrAllowedOnType(attr)) + break; + attr.setUsedAsTypeAttr(); // Attributes with standard syntax have strict rules for what they diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 7affacb1a109a..af05deb3a13da 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -109,7 +109,6 @@ // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method) // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum) -// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function) // CHECK-NEXT: Naked (SubjectMatchRule_function) // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function) // CHECK-NEXT: NoCommon (SubjectMatchRule_variable) diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index ffa942d10669c..f5907d74b0339 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -731,8 +731,8 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(RISCVVLSCall_32768); TCALLINGCONV(RISCVVLSCall_65536); case CC_SpirFunction: return CXCallingConv_Unexposed; - case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; - case CC_OpenCLKernel: return CXCallingConv_Unexposed; + case CC_DeviceKernel: + return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index e52324a8ebc12..803ed67d534ea 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -1117,7 +1117,7 @@ HANDLE_DW_CC(0xc3, LLVM_AAPCS) HANDLE_DW_CC(0xc4, LLVM_AAPCS_VFP) HANDLE_DW_CC(0xc5, LLVM_IntelOclBicc) HANDLE_DW_CC(0xc6, LLVM_SpirFunction) -HANDLE_DW_CC(0xc7, LLVM_OpenCLKernel) +HANDLE_DW_CC(0xc7, LLVM_DeviceKernel) HANDLE_DW_CC(0xc8, LLVM_Swift) HANDLE_DW_CC(0xc9, LLVM_PreserveMost) HANDLE_DW_CC(0xca, LLVM_PreserveAll) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h index bd25f6c30ebf1..a760f773055d2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h @@ -734,13 +734,15 @@ void DWARFTypePrinter<DieType>::appendSubroutineNameAfter( OS << " __attribute__((intel_ocl_bicc))"; break; case dwarf::CallingConvention::DW_CC_LLVM_SpirFunction: - case dwarf::CallingConvention::DW_CC_LLVM_OpenCLKernel: - // These aren't available as attributes, but maybe we should still - // render them somehow? (Clang doesn't render them, but that's an issue + // This isn't available as an attribute, but maybe we should still + // render it somehow? (Clang doesn't render it, but that's an issue // for template names too - since then the DWARF names of templates // instantiated with function types with these calling conventions won't // have distinct names - so we'd need to fix that too) break; + case dwarf::CallingConvention::DW_CC_LLVM_DeviceKernel: + OS << " __attribute__((device_kernel))"; + break; case dwarf::CallingConvention::DW_CC_LLVM_Swift: // SwiftAsync missing OS << " __attribute__((swiftcall))"; diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll index 89c9801b5e466..a95f9a0f9c43e 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll @@ -28,6 +28,6 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo !2 = !{i32 7, !"Dwarf Version", i32 5} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) -!5 = !DISubroutineType(cc: DW_CC_LLVM_OpenCLKernel, types: !6) +!5 = !DISubroutineType(cc: DW_CC_LLVM_DeviceKernel, types: !6) !6 = !{null} !7 = !{i32 1024, i32 1, i32 1} >From 0c752470566e9f74b1d9d1330ecd977674726412 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" <nick.sar...@intel.com> Date: Thu, 8 May 2025 12:53:52 -0700 Subject: [PATCH 2/2] apply feedback and rework member functions Signed-off-by: Sarnie, Nick <nick.sar...@intel.com> --- clang/include/clang/Basic/Attr.td | 48 ++++++++++++------------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 1ce7f0c1835ac..90a7c17e6c07a 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1537,39 +1537,27 @@ def DeviceKernel : DeclOrTypeAttr { let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">, Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">, CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; - let LangOpts = []; let Documentation = [SYCLKernelDocs]; let AdditionalMembers = [{ - inline bool isAMDGPUSpelling() const { - return isAMDGPUSpelling(*this); - } - template<typename T> - static inline bool isAMDGPUSpelling(const T& Attr) { - return Attr.getAttrName()->getName() == "amdgpu_kernel"; - } - inline bool isNVPTXSpelling() const { - return isNVPTXSpelling(*this); - } - template<typename T> - static inline bool isNVPTXSpelling(const T& Attr) { - return Attr.getAttrName()->getName() == "nvptx_kernel"; - } - inline bool isOpenCLSpelling() const { - return isOpenCLSpelling(*this); - } - template<typename T> - static inline bool isOpenCLSpelling(const T& Attr) { - return Attr.getAttrName()->getName() == "kernel" || - Attr.getAttrName()->getName() == "__kernel"; - } - inline bool isSYCLSpelling() const { - return isSYCLSpelling(*this); - } - template<typename T> - static inline bool isSYCLSpelling(const T& Attr) { - return Attr.getAttrName()->getName() == "sycl_kernel"; - } + static inline bool isAMDGPUSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_amdgpu_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_amdgpu_kernel || + A.getAttributeSpellingListIndex() == C23_clang_amdgpu_kernel; + } + static inline bool isNVPTXSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_nvptx_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_nvptx_kernel || + A.getAttributeSpellingListIndex() == C23_clang_nvptx_kernel; + } + static inline bool isSYCLSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_sycl_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_sycl_kernel || + A.getAttributeSpellingListIndex() == C23_clang_sycl_kernel; + } + static inline bool isOpenCLSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == Keyword_kernel; + } }]; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits