yaxunl updated this revision to Diff 244988. yaxunl added a comment. Herald added a subscriber: kerbowa.
Revised by John's comments. Introduced HIPKernelType for GlobalDecl so that we can use GlobalDecl to represent stub and kernel during host compilation. Revised mangler so that GlobalDecl carrying pertinent information can be passed through. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D68578/new/ https://reviews.llvm.org/D68578 Files: clang/include/clang/AST/GlobalDecl.h clang/include/clang/AST/Mangle.h clang/lib/AST/ItaniumMangle.cpp clang/lib/AST/Mangle.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/CodeGen/CGCUDANV.cpp clang/lib/CodeGen/CGCUDARuntime.h clang/lib/CodeGen/CodeGenModule.cpp clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu clang/test/CodeGenCUDA/kernel-stub-name.cu clang/test/CodeGenCUDA/unnamed-types.cu
Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- clang/test/CodeGenCUDA/unnamed-types.cu +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -36,4 +36,4 @@ }(p); } // HOST: @__hip_register_globals -// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0 +// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0 Index: clang/test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-stub-name.cu +++ clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -6,15 +6,50 @@ #include "Inputs/cuda.h" +extern "C" __global__ void ckernel() {} + +namespace ns { +__global__ void nskernel() {} +} // namespace ns + template<class T> __global__ void kernelfunc() {} +__global__ void kernel_decl(); + +// Device side kernel names + +// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00" +// CHECK: @[[NSKERN:[0-9]*]] = {{.*}} c"_ZN2ns8nskernelEv\00" +// CHECK: @[[TKERN:[0-9]*]] = {{.*}} c"_Z10kernelfuncIiEvv\00" + +// Non-template kernel stub functions + +// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]] +// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]] + // CHECK-LABEL: define{{.*}}@_Z8hostfuncv() -// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]() -void hostfunc(void) { kernelfunc<int><<<1, 1>>>(); } +// CHECK: call void @[[CSTUB]]() +// CHECK: call void @[[NSSTUB]]() +// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]() +// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +void hostfunc(void) { + ckernel<<<1, 1>>>(); + ns::nskernel<<<1, 1>>>(); + kernelfunc<int><<<1, 1>>>(); + kernel_decl<<<1, 1>>>(); +} + +// Template kernel stub functions + +// CHECK: define{{.*}}@[[TSTUB]] +// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]] -// CHECK: define{{.*}}@[[STUB]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]] +// CHECK: declare{{.*}}@[[DSTUB]] // CHECK-LABEL: define{{.*}}@__hip_register_globals -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]] +// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]] Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -13,19 +13,19 @@ // HOST-NOT: %struct.T.coerce // CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce) -// HOST: define void @_Z7kernel1Pi.stub(i32* %x) +// HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x) __global__ void kernel1(int *x) { x[0]++; } // CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce) -// HOST: define void @_Z7kernel2Ri.stub(i32* dereferenceable(4) %x) +// HOST: define void @_Z22__device_stub__kernel2Ri(i32* dereferenceable(4) %x) __global__ void kernel2(int &x) { x++; } // CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) -// HOST: define void @_Z7kernel3PU3AS2iPU3AS1i.stub(i32 addrspace(2)* %x, i32 addrspace(1)* %y) +// HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) __global__ void kernel3(__attribute__((address_space(2))) int *x, __attribute__((address_space(1))) int *y) { y[0] = x[0]; @@ -43,7 +43,7 @@ // `by-val` struct will be coerced into a similar struct with all generic // pointers lowerd into global ones. // CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) -// HOST: define void @_Z7kernel41S.stub(i32* %s.coerce0, float* %s.coerce1) +// HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) __global__ void kernel4(struct S s) { s.x[0]++; s.y[0] += 1.f; @@ -51,7 +51,7 @@ // If a pointer to struct is passed, only the pointer itself is coerced into the global one. // CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce) -// HOST: define void @_Z7kernel5P1S.stub(%struct.S* %s) +// HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s) __global__ void kernel5(struct S *s) { s->x[0]++; s->y[0] += 1.f; @@ -62,7 +62,7 @@ }; // `by-val` array is also coerced. // CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce) -// HOST: define void @_Z7kernel61T.stub(float* %t.coerce0, float* %t.coerce1) +// HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1) __global__ void kernel6(struct T t) { t.x[0][0] += 1.f; t.x[1][0] += 2.f; Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -1025,7 +1025,7 @@ else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND)) MC.mangleCXXDtor(D, GD.getDtorType(), Out); else - MC.mangleName(ND, Out); + MC.mangleName(GD, Out); } else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); @@ -1035,6 +1035,10 @@ FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) { llvm::raw_svector_ostream Out(Buffer); Out << "__regcall3__" << II->getName(); + } else if (FD && FD->hasAttr<CUDAGlobalAttr>() && + GD.getHIPKernelKind() == HIPKernelKind::Stub) { + llvm::raw_svector_ostream Out(Buffer); + Out << "__device_stub__" << II->getName(); } else { Out << II->getName(); } @@ -1122,11 +1126,23 @@ const auto *ND = cast<NamedDecl>(GD.getDecl()); std::string MangledName = getMangledNameImpl(*this, GD, ND); - // Adjust kernel stub mangling as we may need to be able to differentiate - // them from the kernel itself (e.g., for HIP). - if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) - if (!getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) - MangledName = getCUDARuntime().getDeviceStubName(MangledName); + // Ensure either we have different ABIs between host and device compilations, + // says host compilation following MSVC ABI but device compilation follows + // Itanium C++ ABI or, if they follow the same ABI, kernel names after + // mangling should be the same after name stubbing. The later checking is + // very important as the device kernel name being mangled in host-compilation + // is used to resolve the device binaries to be executed. Inconsistent naming + // result in undefined behavior. Even though we cannot check that naming + // directly between host- and device-compilations, the host- and + // device-mangling in host compilation could help catching certain ones. + assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() || + getLangOpts().CUDAIsDevice || + (getContext().getAuxTargetInfo() && + (getContext().getAuxTargetInfo()->getCXXABI() != + getContext().getTargetInfo().getCXXABI())) || + getCUDARuntime().getDeviceSideName(cast<FunctionDecl>(ND)) == + getMangledNameImpl( + *this, GD.getWithHIPKernelKind(HIPKernelKind::Kernel), ND)); auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); @@ -3331,7 +3347,9 @@ GD.getDtorType() == Dtor_Complete && DD->getParent()->getNumVBases() == 0) GD = GlobalDecl(DD, Dtor_Base); - } + } else if (cast<FunctionDecl>(GD.getDecl())->hasAttr<CUDAGlobalAttr>()) + GD = GD.getWithHIPKernelKind(LangOpts.CUDAIsDevice ? HIPKernelKind::Kernel : HIPKernelKind::Stub); + StringRef MangledName = getMangledName(GD); return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, @@ -5285,11 +5303,18 @@ switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: - case Decl::Function: - EmitGlobal(cast<FunctionDecl>(D)); + case Decl::Function: { + auto *FD = cast<FunctionDecl>(D); + GlobalDecl GD; + if (FD->hasAttr<CUDAGlobalAttr>()) + GD = GlobalDecl(FD, LangOpts.CUDAIsDevice ? HIPKernelKind::Kernel : HIPKernelKind::Stub); + else + GD = GlobalDecl(FD); + EmitGlobal(GD); // Always provide some coverage mapping // even for the functions that aren't emitted. AddDeferredUnusedCoverageMapping(D); + } break; case Decl::CXXDeductionGuide: Index: clang/lib/CodeGen/CGCUDARuntime.h =================================================================== --- clang/lib/CodeGen/CGCUDARuntime.h +++ clang/lib/CodeGen/CGCUDARuntime.h @@ -25,6 +25,7 @@ namespace clang { class CUDAKernelCallExpr; +class Decl; class VarDecl; namespace CodeGen { @@ -65,9 +66,7 @@ /// Returns a module cleanup function or nullptr if it's not needed. /// Must be called after ModuleCtorFunction virtual llvm::Function *makeModuleDtorFunction() = 0; - - /// Construct and return the stub name of a kernel. - virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0; + virtual std::string getDeviceSideName(const Decl *ND) = 0; }; /// Creates an instance of a CUDA runtime class. Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -117,7 +117,7 @@ void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); - std::string getDeviceSideName(const Decl *ND); + std::string getDeviceSideName(const Decl *ND) override; public: CGNVCUDARuntime(CodeGenModule &CGM); @@ -132,8 +132,6 @@ llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function llvm::Function *makeModuleDtorFunction() override; - /// Construct and return the stub name of a kernel. - std::string getDeviceStubName(llvm::StringRef Name) const override; }; } @@ -219,21 +217,6 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - // Ensure either we have different ABIs between host and device compilations, - // says host compilation following MSVC ABI but device compilation follows - // Itanium C++ ABI or, if they follow the same ABI, kernel names after - // mangling should be the same after name stubbing. The later checking is - // very important as the device kernel name being mangled in host-compilation - // is used to resolve the device binaries to be executed. Inconsistent naming - // result in undefined behavior. Even though we cannot check that naming - // directly between host- and device-compilations, the host- and - // device-mangling in host compilation could help catching certain ones. - assert((CGF.CGM.getContext().getAuxTargetInfo() && - (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != - CGF.CGM.getContext().getTargetInfo().getCXXABI())) || - getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == - CGF.CurFn->getName()); - EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH) || @@ -797,12 +780,6 @@ return ModuleDtorFunc; } -std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { - if (!CGM.getLangOpts().HIP) - return std::string(Name); - return (Name + ".stub").str(); -} - CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { return new CGNVCUDARuntime(CGM); } Index: clang/lib/AST/MicrosoftMangle.cpp =================================================================== --- clang/lib/AST/MicrosoftMangle.cpp +++ clang/lib/AST/MicrosoftMangle.cpp @@ -135,7 +135,7 @@ MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags); bool shouldMangleCXXName(const NamedDecl *D) override; bool shouldMangleStringLiteral(const StringLiteral *SL) override; - void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override; + void mangleCXXName(GlobalDecl GD, raw_ostream &Out) override; void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, const MethodVFTableLocation &ML, raw_ostream &Out) override; @@ -2949,8 +2949,9 @@ << Range; } -void MicrosoftMangleContextImpl::mangleCXXName(const NamedDecl *D, +void MicrosoftMangleContextImpl::mangleCXXName(GlobalDecl GD, raw_ostream &Out) { + const NamedDecl *D = cast<NamedDecl>(GD.getDecl()); assert((isa<FunctionDecl>(D) || isa<VarDecl>(D)) && "Invalid mangleName() call, argument is not a variable or function!"); assert(!isa<CXXConstructorDecl>(D) && !isa<CXXDestructorDecl>(D) && Index: clang/lib/AST/Mangle.cpp =================================================================== --- clang/lib/AST/Mangle.cpp +++ clang/lib/AST/Mangle.cpp @@ -114,7 +114,8 @@ return shouldMangleCXXName(D); } -void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) { +void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) { + const NamedDecl *D = cast<NamedDecl>(GD.getDecl()); // Any decl can be declared with __asm("foo") on it, and this takes precedence // over all other naming in the .o file. if (const AsmLabelAttr *ALA = D->getAttr<AsmLabelAttr>()) { @@ -149,7 +150,7 @@ if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) mangleObjCMethodName(OMD, Out); else - mangleCXXName(D, Out); + mangleCXXName(GD, Out); return; } Index: clang/lib/AST/ItaniumMangle.cpp =================================================================== --- clang/lib/AST/ItaniumMangle.cpp +++ clang/lib/AST/ItaniumMangle.cpp @@ -135,7 +135,7 @@ bool shouldMangleStringLiteral(const StringLiteral *) override { return false; } - void mangleCXXName(const NamedDecl *D, raw_ostream &) override; + void mangleCXXName(GlobalDecl GD, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, raw_ostream &) override; void mangleCXXDtorThunk(const CXXDestructorDecl *DD, CXXDtorType Type, @@ -417,14 +417,14 @@ void disableDerivedAbiTags() { DisableDerivedAbiTags = true; } static bool shouldHaveAbiTags(ItaniumMangleContextImpl &C, const VarDecl *VD); - void mangle(const NamedDecl *D); + void mangle(GlobalDecl GD); void mangleCallOffset(int64_t NonVirtual, int64_t Virtual); void mangleNumber(const llvm::APSInt &I); void mangleNumber(int64_t Number); void mangleFloat(const llvm::APFloat &F); - void mangleFunctionEncoding(const FunctionDecl *FD); + void mangleFunctionEncoding(GlobalDecl GD); void mangleSeqID(unsigned SeqID); - void mangleName(const NamedDecl *ND); + void mangleName(GlobalDecl GD); void mangleType(QualType T); void mangleNameOrStandardSubstitution(const NamedDecl *ND); void mangleLambdaSig(const CXXRecordDecl *Lambda); @@ -461,38 +461,39 @@ void mangleFunctionEncodingBareType(const FunctionDecl *FD); - void mangleNameWithAbiTags(const NamedDecl *ND, + void mangleNameWithAbiTags(GlobalDecl GD, const AbiTagList *AdditionalAbiTags); void mangleModuleName(const Module *M); void mangleModuleNamePrefix(StringRef Name); void mangleTemplateName(const TemplateDecl *TD, const TemplateArgument *TemplateArgs, unsigned NumTemplateArgs); - void mangleUnqualifiedName(const NamedDecl *ND, + void mangleUnqualifiedName(GlobalDecl GD, const AbiTagList *AdditionalAbiTags) { - mangleUnqualifiedName(ND, ND->getDeclName(), UnknownArity, + mangleUnqualifiedName(GD, cast<NamedDecl>(GD.getDecl())->getDeclName(), UnknownArity, AdditionalAbiTags); } - void mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name, + void mangleUnqualifiedName(GlobalDecl GD, DeclarationName Name, unsigned KnownArity, const AbiTagList *AdditionalAbiTags); - void mangleUnscopedName(const NamedDecl *ND, + void mangleUnscopedName(GlobalDecl GD, const AbiTagList *AdditionalAbiTags); - void mangleUnscopedTemplateName(const TemplateDecl *ND, + void mangleUnscopedTemplateName(GlobalDecl GD, const AbiTagList *AdditionalAbiTags); void mangleUnscopedTemplateName(TemplateName, const AbiTagList *AdditionalAbiTags); void mangleSourceName(const IdentifierInfo *II); void mangleRegCallName(const IdentifierInfo *II); + void mangleDeviceStubName(const IdentifierInfo *II); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); - void mangleLocalName(const Decl *D, + void mangleLocalName(GlobalDecl GD, const AbiTagList *AdditionalAbiTags); void mangleBlockForPrefix(const BlockDecl *Block); void mangleUnqualifiedBlock(const BlockDecl *Block); void mangleTemplateParamDecl(const NamedDecl *Decl); void mangleLambda(const CXXRecordDecl *Lambda); - void mangleNestedName(const NamedDecl *ND, const DeclContext *DC, + void mangleNestedName(GlobalDecl GD, const DeclContext *DC, const AbiTagList *AdditionalAbiTags, bool NoFunction=false); void mangleNestedName(const TemplateDecl *TD, @@ -501,7 +502,7 @@ void manglePrefix(NestedNameSpecifier *qualifier); void manglePrefix(const DeclContext *DC, bool NoFunction=false); void manglePrefix(QualType type); - void mangleTemplatePrefix(const TemplateDecl *ND, bool NoFunction=false); + void mangleTemplatePrefix(GlobalDecl GD, bool NoFunction=false); void mangleTemplatePrefix(TemplateName Template); bool mangleUnresolvedTypeOrSimpleId(QualType DestroyedType, StringRef Prefix = ""); @@ -640,13 +641,14 @@ writeAbiTags(ND, AdditionalAbiTags); } -void CXXNameMangler::mangle(const NamedDecl *D) { +void CXXNameMangler::mangle(GlobalDecl GD) { + const NamedDecl *D = dyn_cast<NamedDecl>(GD.getDecl()); // <mangled-name> ::= _Z <encoding> // ::= <data name> // ::= <special-name> Out << "_Z"; if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) - mangleFunctionEncoding(FD); + mangleFunctionEncoding(GD); else if (const VarDecl *VD = dyn_cast<VarDecl>(D)) mangleName(VD); else if (const IndirectFieldDecl *IFD = dyn_cast<IndirectFieldDecl>(D)) @@ -655,19 +657,20 @@ mangleName(cast<FieldDecl>(D)); } -void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) { +void CXXNameMangler::mangleFunctionEncoding(GlobalDecl GD) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); // <encoding> ::= <function name> <bare-function-type> // Don't mangle in the type if this isn't a decl we should typically mangle. if (!Context.shouldMangleDeclName(FD)) { - mangleName(FD); + mangleName(GD); return; } AbiTagList ReturnTypeAbiTags = makeFunctionReturnTypeTags(FD); if (ReturnTypeAbiTags.empty()) { // There are no tags for return type, the simplest case. - mangleName(FD); + mangleName(GD); mangleFunctionEncodingBareType(FD); return; } @@ -787,13 +790,14 @@ return isStd(cast<NamespaceDecl>(DC)); } -static const TemplateDecl * -isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) { +static const GlobalDecl +isTemplate(GlobalDecl GD, const TemplateArgumentList *&TemplateArgs) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); // Check if we have a function template. if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) { if (const TemplateDecl *TD = FD->getPrimaryTemplate()) { TemplateArgs = FD->getTemplateSpecializationArgs(); - return TD; + return GD.getWithDecl(TD); } } @@ -801,20 +805,21 @@ if (const ClassTemplateSpecializationDecl *Spec = dyn_cast<ClassTemplateSpecializationDecl>(ND)) { TemplateArgs = &Spec->getTemplateArgs(); - return Spec->getSpecializedTemplate(); + return GD.getWithDecl(Spec->getSpecializedTemplate()); } // Check if we have a variable template. if (const VarTemplateSpecializationDecl *Spec = dyn_cast<VarTemplateSpecializationDecl>(ND)) { TemplateArgs = &Spec->getTemplateArgs(); - return Spec->getSpecializedTemplate(); + return GD.getWithDecl(Spec->getSpecializedTemplate()); } - return nullptr; + return GlobalDecl::getFromOpaquePtr(nullptr); } -void CXXNameMangler::mangleName(const NamedDecl *ND) { +void CXXNameMangler::mangleName(GlobalDecl GD) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) { // Variables should have implicit tags from its type. AbiTagList VariableTypeAbiTags = makeVariableTypeTags(VD); @@ -843,12 +848,13 @@ // Output name with implicit tags. mangleNameWithAbiTags(VD, &AdditionalAbiTags); } else { - mangleNameWithAbiTags(ND, nullptr); + mangleNameWithAbiTags(GD, nullptr); } } -void CXXNameMangler::mangleNameWithAbiTags(const NamedDecl *ND, +void CXXNameMangler::mangleNameWithAbiTags(GlobalDecl GD, const AbiTagList *AdditionalAbiTags) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); // <name> ::= [<module-name>] <nested-name> // ::= [<module-name>] <unscoped-name> // ::= [<module-name>] <unscoped-template-name> <template-args> @@ -864,14 +870,14 @@ while (!DC->isNamespace() && !DC->isTranslationUnit()) DC = getEffectiveParentContext(DC); else if (GetLocalClassDecl(ND)) { - mangleLocalName(ND, AdditionalAbiTags); + mangleLocalName(GD, AdditionalAbiTags); return; } DC = IgnoreLinkageSpecDecls(DC); if (isLocalContainerContext(DC)) { - mangleLocalName(ND, AdditionalAbiTags); + mangleLocalName(GD, AdditionalAbiTags); return; } @@ -886,17 +892,17 @@ if (DC->isTranslationUnit() || isStdNamespace(DC)) { // Check if we have a template. const TemplateArgumentList *TemplateArgs = nullptr; - if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) { + if (GlobalDecl TD = isTemplate(GD, TemplateArgs)) { mangleUnscopedTemplateName(TD, AdditionalAbiTags); mangleTemplateArgs(*TemplateArgs); return; } - mangleUnscopedName(ND, AdditionalAbiTags); + mangleUnscopedName(GD, AdditionalAbiTags); return; } - mangleNestedName(ND, DC, AdditionalAbiTags); + mangleNestedName(GD, DC, AdditionalAbiTags); } void CXXNameMangler::mangleModuleName(const Module *M) { @@ -947,19 +953,21 @@ } } -void CXXNameMangler::mangleUnscopedName(const NamedDecl *ND, +void CXXNameMangler::mangleUnscopedName(GlobalDecl GD, const AbiTagList *AdditionalAbiTags) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); // <unscoped-name> ::= <unqualified-name> // ::= St <unqualified-name> # ::std:: if (isStdNamespace(IgnoreLinkageSpecDecls(getEffectiveDeclContext(ND)))) Out << "St"; - mangleUnqualifiedName(ND, AdditionalAbiTags); + mangleUnqualifiedName(GD, AdditionalAbiTags); } void CXXNameMangler::mangleUnscopedTemplateName( - const TemplateDecl *ND, const AbiTagList *AdditionalAbiTags) { + GlobalDecl GD, const AbiTagList *AdditionalAbiTags) { + const TemplateDecl *ND = cast<TemplateDecl>(GD.getDecl()); // <unscoped-template-name> ::= <unscoped-name> // ::= <substitution> if (mangleSubstitution(ND)) @@ -971,9 +979,9 @@ "template template param cannot have abi tags"); mangleTemplateParameter(TTP->getDepth(), TTP->getIndex()); } else if (isa<BuiltinTemplateDecl>(ND) || isa<ConceptDecl>(ND)) { - mangleUnscopedName(ND, AdditionalAbiTags); + mangleUnscopedName(GD, AdditionalAbiTags); } else { - mangleUnscopedName(ND->getTemplatedDecl(), AdditionalAbiTags); + mangleUnscopedName(GD.getWithDecl(ND->getTemplatedDecl()), AdditionalAbiTags); } addSubstitution(ND); @@ -1250,10 +1258,11 @@ mangleTemplateArgs(TemplateArgs, NumTemplateArgs); } -void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, +void CXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, DeclarationName Name, unsigned KnownArity, const AbiTagList *AdditionalAbiTags) { + const NamedDecl *ND = cast_or_null<NamedDecl>(GD.getDecl()); unsigned Arity = KnownArity; // <unqualified-name> ::= <operator-name> // ::= <ctor-dtor-name> @@ -1303,7 +1312,12 @@ bool IsRegCall = FD && FD->getType()->castAs<FunctionType>()->getCallConv() == clang::CC_X86RegCall; - if (IsRegCall) + bool IsDeviceStub = + FD && FD->hasAttr<CUDAGlobalAttr>() && + GD.getHIPKernelKind() == HIPKernelKind::Stub; + if (IsDeviceStub) + mangleDeviceStubName(II); + else if (IsRegCall) mangleRegCallName(II); else mangleSourceName(II); @@ -1492,6 +1506,14 @@ << II->getName(); } +void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) { + // <source-name> ::= <positive length number> __device_stub__ <identifier> + // <number> ::= [n] <non-negative decimal integer> + // <identifier> ::= <unqualified source code identifier> + Out << II->getLength() + sizeof("__device_stub__") - 1 << "__device_stub__" + << II->getName(); +} + void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // <source-name> ::= <positive length number> <identifier> // <number> ::= [n] <non-negative decimal integer> @@ -1499,10 +1521,11 @@ Out << II->getLength() << II->getName(); } -void CXXNameMangler::mangleNestedName(const NamedDecl *ND, +void CXXNameMangler::mangleNestedName(GlobalDecl GD, const DeclContext *DC, const AbiTagList *AdditionalAbiTags, bool NoFunction) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); // <nested-name> // ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E // ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix> @@ -1520,13 +1543,13 @@ // Check if we have a template. const TemplateArgumentList *TemplateArgs = nullptr; - if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) { + if (GlobalDecl TD = isTemplate(GD, TemplateArgs)) { mangleTemplatePrefix(TD, NoFunction); mangleTemplateArgs(*TemplateArgs); } else { manglePrefix(DC, NoFunction); - mangleUnqualifiedName(ND, AdditionalAbiTags); + mangleUnqualifiedName(GD, AdditionalAbiTags); } Out << 'E'; @@ -1544,8 +1567,9 @@ Out << 'E'; } -void CXXNameMangler::mangleLocalName(const Decl *D, +void CXXNameMangler::mangleLocalName(GlobalDecl GD, const AbiTagList *AdditionalAbiTags) { + const Decl *D = GD.getDecl(); // <local-name> := Z <function encoding> E <entity name> [<discriminator>] // := Z <function encoding> E s [<discriminator>] // <local-name> := Z <function encoding> E d [ <parameter number> ] @@ -1564,8 +1588,17 @@ mangleObjCMethodName(MD); else if (const BlockDecl *BD = dyn_cast<BlockDecl>(DC)) mangleBlockForPrefix(BD); - else - mangleFunctionEncoding(cast<FunctionDecl>(DC)); + else { + GlobalDecl DCGD; + // The ctor and dtor type is not available in the declare context. + if (auto *CD = dyn_cast<CXXConstructorDecl>(DC)) + DCGD = GlobalDecl(CD, Ctor_Complete); + else if (auto *DD = dyn_cast<CXXDestructorDecl>(DC)) + DCGD = GlobalDecl(DD, Dtor_Complete); + else + DCGD = GlobalDecl(dyn_cast<FunctionDecl>(DC)); + mangleFunctionEncoding(DCGD); + } // Implicit ABI tags (from namespace) are not available in the following // entity; reset to actually emitted tags, which are available. @@ -1608,7 +1641,7 @@ mangleUnqualifiedBlock(BD); } else { const NamedDecl *ND = cast<NamedDecl>(D); - mangleNestedName(ND, getEffectiveDeclContext(ND), AdditionalAbiTags, + mangleNestedName(GD, getEffectiveDeclContext(ND), AdditionalAbiTags, true /*NoFunction*/); } } else if (const BlockDecl *BD = dyn_cast<BlockDecl>(D)) { @@ -1629,7 +1662,7 @@ assert(!AdditionalAbiTags && "Block cannot have additional abi tags"); mangleUnqualifiedBlock(BD); } else { - mangleUnqualifiedName(cast<NamedDecl>(D), AdditionalAbiTags); + mangleUnqualifiedName(GD, AdditionalAbiTags); } if (const NamedDecl *ND = dyn_cast<NamedDecl>(RD ? RD : D)) { @@ -1840,7 +1873,7 @@ // Check if we have a template. const TemplateArgumentList *TemplateArgs = nullptr; - if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) { + if (GlobalDecl TD = isTemplate(ND, TemplateArgs)) { mangleTemplatePrefix(TD); mangleTemplateArgs(*TemplateArgs); } else { @@ -1863,7 +1896,7 @@ if (OverloadedTemplateStorage *Overloaded = Template.getAsOverloadedTemplate()) { - mangleUnqualifiedName(nullptr, (*Overloaded->begin())->getDeclName(), + mangleUnqualifiedName(static_cast<NamedDecl *>(nullptr), (*Overloaded->begin())->getDeclName(), UnknownArity, nullptr); return; } @@ -1875,8 +1908,9 @@ mangleUnscopedTemplateName(Template, /* AdditionalAbiTags */ nullptr); } -void CXXNameMangler::mangleTemplatePrefix(const TemplateDecl *ND, +void CXXNameMangler::mangleTemplatePrefix(GlobalDecl GD, bool NoFunction) { + const TemplateDecl *ND = cast<TemplateDecl>(GD.getDecl()); // <template-prefix> ::= <prefix> <template unqualified-name> // ::= <template-param> // ::= <substitution> @@ -1892,9 +1926,9 @@ } else { manglePrefix(getEffectiveDeclContext(ND), NoFunction); if (isa<BuiltinTemplateDecl>(ND) || isa<ConceptDecl>(ND)) - mangleUnqualifiedName(ND, nullptr); + mangleUnqualifiedName(GD, nullptr); else - mangleUnqualifiedName(ND->getTemplatedDecl(), nullptr); + mangleUnqualifiedName(GD.getWithDecl(ND->getTemplatedDecl()), nullptr); } addSubstitution(ND); @@ -4943,8 +4977,9 @@ /// and this routine will return false. In this case, the caller should just /// emit the identifier of the declaration (\c D->getIdentifier()) as its /// name. -void ItaniumMangleContextImpl::mangleCXXName(const NamedDecl *D, +void ItaniumMangleContextImpl::mangleCXXName(GlobalDecl GD, raw_ostream &Out) { + const NamedDecl *D = cast<NamedDecl>(GD.getDecl()); assert((isa<FunctionDecl>(D) || isa<VarDecl>(D)) && "Invalid mangleName() call, argument is not a variable or function!"); assert(!isa<CXXConstructorDecl>(D) && !isa<CXXDestructorDecl>(D) && @@ -4955,33 +4990,33 @@ "Mangling declaration"); CXXNameMangler Mangler(*this, Out, D); - Mangler.mangle(D); + Mangler.mangle(GD); } void ItaniumMangleContextImpl::mangleCXXCtor(const CXXConstructorDecl *D, CXXCtorType Type, raw_ostream &Out) { CXXNameMangler Mangler(*this, Out, D, Type); - Mangler.mangle(D); + Mangler.mangle(GlobalDecl(D, Type)); } void ItaniumMangleContextImpl::mangleCXXDtor(const CXXDestructorDecl *D, CXXDtorType Type, raw_ostream &Out) { CXXNameMangler Mangler(*this, Out, D, Type); - Mangler.mangle(D); + Mangler.mangle(GlobalDecl(D, Type)); } void ItaniumMangleContextImpl::mangleCXXCtorComdat(const CXXConstructorDecl *D, raw_ostream &Out) { CXXNameMangler Mangler(*this, Out, D, Ctor_Comdat); - Mangler.mangle(D); + Mangler.mangle(GlobalDecl(D, Ctor_Comdat)); } void ItaniumMangleContextImpl::mangleCXXDtorComdat(const CXXDestructorDecl *D, raw_ostream &Out) { CXXNameMangler Mangler(*this, Out, D, Dtor_Comdat); - Mangler.mangle(D); + Mangler.mangle(GlobalDecl(D, Dtor_Comdat)); } void ItaniumMangleContextImpl::mangleThunk(const CXXMethodDecl *MD, @@ -5025,7 +5060,7 @@ Mangler.mangleCallOffset(ThisAdjustment.NonVirtual, ThisAdjustment.Virtual.Itanium.VCallOffsetOffset); - Mangler.mangleFunctionEncoding(DD); + Mangler.mangleFunctionEncoding(GlobalDecl(DD, Type)); } /// Returns the mangled name for a guard variable for the passed in VarDecl. Index: clang/include/clang/AST/Mangle.h =================================================================== --- clang/include/clang/AST/Mangle.h +++ clang/include/clang/AST/Mangle.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_AST_MANGLE_H #include "clang/AST/Decl.h" +#include "clang/AST/GlobalDecl.h" #include "clang/AST/Type.h" #include "clang/Basic/ABI.h" #include "llvm/ADT/DenseMap.h" @@ -96,8 +97,8 @@ virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0; // FIXME: consider replacing raw_ostream & with something like SmallString &. - void mangleName(const NamedDecl *D, raw_ostream &); - virtual void mangleCXXName(const NamedDecl *D, raw_ostream &) = 0; + void mangleName(GlobalDecl GD, raw_ostream &); + virtual void mangleCXXName(GlobalDecl GD, raw_ostream &) = 0; virtual void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, raw_ostream &) = 0; Index: clang/include/clang/AST/GlobalDecl.h =================================================================== --- clang/include/clang/AST/GlobalDecl.h +++ clang/include/clang/AST/GlobalDecl.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_AST_GLOBALDECL_H #define LLVM_CLANG_AST_GLOBALDECL_H +#include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" @@ -33,6 +34,11 @@ AtExit, }; +enum class HIPKernelKind : unsigned { + Kernel = 0, + Stub = 1, +}; + /// GlobalDecl - represents a global declaration. This can either be a /// CXXConstructorDecl and the constructor type (Base, Complete). /// a CXXDestructorDecl and the destructor type (Base, Complete) or @@ -42,8 +48,8 @@ unsigned MultiVersionIndex = 0; void Init(const Decl *D) { - assert(!isa<CXXConstructorDecl>(D) && "Use other ctor with ctor decls!"); - assert(!isa<CXXDestructorDecl>(D) && "Use other ctor with dtor decls!"); + assert((!D || !isa<CXXConstructorDecl>(D)) && "Use other ctor with ctor decls!"); + assert((!D || !isa<CXXDestructorDecl>(D)) && "Use other ctor with dtor decls!"); Value.setPointer(D); } @@ -55,6 +61,7 @@ : MultiVersionIndex(MVIndex) { Init(D); } + GlobalDecl(const NamedDecl *D) { Init(D); } GlobalDecl(const BlockDecl *D) { Init(D); } GlobalDecl(const CapturedDecl *D) { Init(D); } GlobalDecl(const ObjCMethodDecl *D) { Init(D); } @@ -64,6 +71,10 @@ GlobalDecl(const CXXDestructorDecl *D, CXXDtorType Type) : Value(D, Type) {} GlobalDecl(const VarDecl *D, DynamicInitKind StubKind) : Value(D, unsigned(StubKind)) {} + GlobalDecl(const FunctionDecl *D, HIPKernelKind Kind) + : Value(D, unsigned(Kind)) { + assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a HIP kernel!"); + } GlobalDecl getCanonicalDecl() const { GlobalDecl CanonGD; @@ -94,13 +105,22 @@ } unsigned getMultiVersionIndex() const { - assert(isa<FunctionDecl>(getDecl()) && + assert(isa<FunctionDecl>( + getDecl()) && + !cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && !isa<CXXConstructorDecl>(getDecl()) && !isa<CXXDestructorDecl>(getDecl()) && "Decl is not a plain FunctionDecl!"); return MultiVersionIndex; } + HIPKernelKind getHIPKernelKind() const { + assert(isa<FunctionDecl>(getDecl()) && + cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && + "Decl is not a HIP kernel!"); + return static_cast<HIPKernelKind>(Value.getInt()); + } + friend bool operator==(const GlobalDecl &LHS, const GlobalDecl &RHS) { return LHS.Value == RHS.Value && LHS.MultiVersionIndex == RHS.MultiVersionIndex; @@ -108,6 +128,8 @@ void *getAsOpaquePtr() const { return Value.getOpaqueValue(); } + operator bool() const { return getAsOpaquePtr(); } + static GlobalDecl getFromOpaquePtr(void *P) { GlobalDecl GD; GD.Value.setFromOpaqueValue(P); @@ -135,7 +157,9 @@ } GlobalDecl getWithMultiVersionIndex(unsigned Index) { - assert(isa<FunctionDecl>(getDecl()) && + assert(isa<FunctionDecl>( + getDecl()) && + !cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && !isa<CXXConstructorDecl>(getDecl()) && !isa<CXXDestructorDecl>(getDecl()) && "Decl is not a plain FunctionDecl!"); @@ -143,6 +167,15 @@ Result.MultiVersionIndex = Index; return Result; } + + GlobalDecl getWithHIPKernelKind(HIPKernelKind Kind) { + assert(isa<FunctionDecl>(getDecl()) && + cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && + "Decl is not a HIP kernel!"); + GlobalDecl Result(*this); + Result.Value.setInt(unsigned(Kind)); + return Result; + } }; } // namespace clang
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits