hliao created this revision. hliao added reviewers: tra, yaxunl. Herald added a project: clang. Herald added a subscriber: cfe-commits.
- Clang follows its own scheme for lambdas which don't need to follow ODR rule. That scheme will assign an unqiue ID within the TU scope and won't be unique or consistent across TUs. - In CUDA/HIP, a lambda with `__device__` or `__host__ __device__` (or an extended lambda) may be used in `__global__` template function instantiation. If that lambda cannot be named following ODR rule, the device compilation may produce a mismatching device kernel name from the host compilation as the anonymous type ID assignment aforementioned. - In this patch, a new language option, `-fcuda-force-lambda-odr`, is introduced to force ODR for lambda naming so that all lambda could be consistently named across TUs, including the device compilation. This solves the assertion checking device kernel names as well as ensures the named-based resolution could resolve the correct device binaries from the device name generated in the host compilation. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D63164 Files: clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/CC1Options.td clang/include/clang/Sema/Sema.h clang/lib/Frontend/CompilerInvocation.cpp clang/lib/Sema/SemaLambda.cpp clang/test/CodeGenCUDA/unnamed-types.cu
Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -fcuda-force-lambda-odr -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-force-lambda-odr -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE + +#include "Inputs/cuda.h" + +// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1 + +__device__ float d0(float x) { + return [](float x) { return x + 2.f; }(x); +} + +__device__ float d1(float x) { + return [](float x) { return x * 2.f; }(x); +} + +// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_( +template <typename F> +__global__ void k0(float *p, F f) { + p[0] = f(p[0]) + d0(p[1]) + d1(p[2]); +} + +void f0(float *p) { + [](float *p) { + *p = 1.f; + }(p); +} + +void f1(float *p) { + [](float *p) { + k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; }); + }(p); +} +// HOST: @__hip_register_globals +// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0 Index: clang/lib/Sema/SemaLambda.cpp =================================================================== --- clang/lib/Sema/SemaLambda.cpp +++ clang/lib/Sema/SemaLambda.cpp @@ -272,9 +272,8 @@ return false; } -MangleNumberingContext * -Sema::getCurrentMangleNumberContext(const DeclContext *DC, - Decl *&ManglingContextDecl) { +MangleNumberingContext *Sema::getCurrentMangleNumberContext( + const DeclContext *DC, Decl *&ManglingContextDecl, bool SkpNoODRChk) { // Compute the context for allocating mangling numbers in the current // expression, if the ABI requires them. ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl; @@ -322,7 +321,8 @@ case Normal: { // -- the bodies of non-exported nonspecialized template functions // -- the bodies of inline functions - if ((IsInNonspecializedTemplate && + if (SkpNoODRChk || + (IsInNonspecializedTemplate && !(ManglingContextDecl && isa<ParmVarDecl>(ManglingContextDecl))) || isInInlineFunction(CurContext)) { ManglingContextDecl = nullptr; @@ -337,7 +337,7 @@ case StaticDataMember: // -- the initializers of nonspecialized static members of template classes - if (!IsInNonspecializedTemplate) { + if (!SkpNoODRChk && !IsInNonspecializedTemplate) { ManglingContextDecl = nullptr; return nullptr; } @@ -441,9 +441,9 @@ Class->setLambdaMangling(Mangling->first, Mangling->second); } else { Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = - getCurrentMangleNumberContext(Class->getDeclContext(), - ManglingContextDecl)) { + if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext( + Class->getDeclContext(), ManglingContextDecl, + getLangOpts().CUDAForceLambdaODR)) { unsigned ManglingNumber = MCtx->getManglingNumber(Method); Class->setLambdaMangling(ManglingNumber, ManglingContextDecl); } Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -2431,6 +2431,9 @@ if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) Opts.CUDAHostDeviceConstexpr = 0; + if (Args.hasArg(OPT_fcuda_force_lambda_odr)) + Opts.CUDAForceLambdaODR = 1; + if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals)) Opts.CUDADeviceApproxTranscendentals = 1; Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -1090,10 +1090,10 @@ /// block literal. /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl /// associated with the context, if relevant. - MangleNumberingContext *getCurrentMangleNumberContext( - const DeclContext *DC, - Decl *&ManglingContextDecl); - + MangleNumberingContext * + getCurrentMangleNumberContext(const DeclContext *DC, + Decl *&ManglingContextDecl, + bool SkpNoODRChk = false); /// SpecialMemberOverloadResult - The overloading result for a special member /// function. Index: clang/include/clang/Driver/CC1Options.td =================================================================== --- clang/include/clang/Driver/CC1Options.td +++ clang/include/clang/Driver/CC1Options.td @@ -863,6 +863,8 @@ HelpText<"Allow variadic functions in CUDA device code.">; def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">, HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">; +def fcuda_force_lambda_odr : Flag<["-"], "fcuda-force-lambda-odr">, + HelpText<"Force lambda naming following one definition rule (ODR).">; //===----------------------------------------------------------------------===// // OpenMP Options Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -219,6 +219,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") +LANGOPT(CUDAForceLambdaODR, 1, 0, "force lambda naming following one definition rule (ODR)") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits