yaxunl created this revision. yaxunl added reviewers: tra, rjmccall. Lambda functions do not have names, therefore they do not need host/device attribute for overloading resolution. They are also have internal linkage and is only emitted if used, therefore no need to use host/device attribute to indicate that they should only be emitted for host or device, since clang can detect whether they are used and emitted accordingly.
Therefore it seems letting lambda functions have host device attributes by default should not cause ambiguity or unexpected emission. On the other hand, inferring host/device attribute of lambda function by context is inaccurate, since a lambda function can be defined in a host function and passed to a template kernel as template argument and called in that kernel, i.e., many cases a lambda function defined in a host function is intended to be a device function. This patch let lambda function be host device by default for HIP. This should make lambda easier to use without unwanted side effect. https://reviews.llvm.org/D78655 Files: clang/lib/Sema/SemaCUDA.cpp clang/test/CodeGenCUDA/lambda.cu Index: clang/test/CodeGenCUDA/lambda.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/lambda.cu @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// HOST: @[[KERN:[0-9]+]] = private unnamed_addr constant [22 x i8] c"_Z1gIZ4mainEUlvE_EvT_\00" +// HOST: define internal void @_Z1hIZ4mainEUlvE_EvT_ +// HOST: define internal void @_Z16__device_stub__gIZ4mainEUlvE_EvT_ +// HOST: @__hipRegisterFunction(i8** %0, i8* bitcast ({{.*}}@[[KERN]] +// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv +// DEV: define amdgpu_kernel void @_Z1gIZ4mainEUlvE_EvT_ +// DEV: define internal void @_ZZ4mainENKUlvE_clEv +template<class F> +__global__ void g(F f) { f(); } + +template<class F> +void h(F f) { g<<<1,1>>>(f); } + +__device__ int a; + +int main(void) { + h([&](){ a=1;}); +} Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -718,6 +718,11 @@ FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); if (!CurFn) return; + if (getLangOpts().HIP) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + return; + } CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); if (Target == CFT_Global || Target == CFT_Device) { Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
Index: clang/test/CodeGenCUDA/lambda.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/lambda.cu @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// HOST: @[[KERN:[0-9]+]] = private unnamed_addr constant [22 x i8] c"_Z1gIZ4mainEUlvE_EvT_\00" +// HOST: define internal void @_Z1hIZ4mainEUlvE_EvT_ +// HOST: define internal void @_Z16__device_stub__gIZ4mainEUlvE_EvT_ +// HOST: @__hipRegisterFunction(i8** %0, i8* bitcast ({{.*}}@[[KERN]] +// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv +// DEV: define amdgpu_kernel void @_Z1gIZ4mainEUlvE_EvT_ +// DEV: define internal void @_ZZ4mainENKUlvE_clEv +template<class F> +__global__ void g(F f) { f(); } + +template<class F> +void h(F f) { g<<<1,1>>>(f); } + +__device__ int a; + +int main(void) { + h([&](){ a=1;}); +} Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -718,6 +718,11 @@ FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); if (!CurFn) return; + if (getLangOpts().HIP) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + return; + } CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); if (Target == CFT_Global || Target == CFT_Device) { Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits