yaxunl updated this revision to Diff 308519.
yaxunl marked an inline comment as done.
yaxunl added a comment.
extract lambda as a function
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D91088/new/
https://reviews.llvm.org/D91088
Files:
clang/lib/CodeGen/CGExpr.cpp
clang/lib/Sema/SemaExpr.cpp
clang/test/CodeGenCUDA/lambda-reference-var.cu
Index: clang/test/CodeGenCUDA/lambda-reference-var.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/lambda-reference-var.cu
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: -triple x86_64-linux-gnu \
+// RUN: | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
+// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: | FileCheck -check-prefix=DEV %s
+
+#include "Inputs/cuda.h"
+
+// HOST: %[[T1:.*]] = type <{ i32*, i32, [4 x i8] }>
+// HOST: %[[T2:.*]] = type { i32*, i32** }
+// HOST: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }>
+// DEV: %[[T1:.*]] = type { i32* }
+// DEV: %[[T2:.*]] = type { i32** }
+// DEV: %[[T3:.*]] = type <{ i32*, i32, [4 x i8] }>
+int global_host_var;
+__device__ int global_device_var;
+
+template<class F>
+__global__ void kern(F f) { f(); }
+
+// DEV-LABEL: @_ZZ27dev_capture_dev_ref_by_copyPiENKUlvE_clEv(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_capture_dev_ref_by_copy(int *out) {
+ int &ref = global_device_var;
+ [=](){ *out = ref;}();
+}
+
+// DEV-LABEL: @_ZZ26dev_capture_dev_ref_by_refPiENKUlvE_clEv(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_capture_dev_ref_by_ref(int *out) {
+ int &ref = global_device_var;
+ [&](){ ref++; *out = ref;}();
+}
+
+// DEV-LABEL: define void @_Z7dev_refPi(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_ref(int *out) {
+ int &ref = global_device_var;
+ ref++;
+ *out = ref;
+}
+
+// DEV-LABEL: @_ZZ14dev_lambda_refPiENKUlvE_clEv(
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// DEV: store i32 %[[VAL2]], i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: %[[VAL:.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_device_var to i32*)
+// DEV: store i32 %[[VAL]]
+__device__ void dev_lambda_ref(int *out) {
+ [=](){
+ int &ref = global_device_var;
+ ref++;
+ *out = ref;
+ }();
+}
+
+// HOST-LABEL: @_ZZ29host_capture_host_ref_by_copyPiENKUlvE_clEv(
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_capture_host_ref_by_copy(int *out) {
+ int &ref = global_host_var;
+ [=](){ *out = ref;}();
+}
+
+// HOST-LABEL: @_ZZ28host_capture_host_ref_by_refPiENKUlvE_clEv(
+// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T2]], %[[T2]]* %this1, i32 0, i32 0
+// HOST: %[[REF:.*]] = load i32*, i32** %[[CAP]]
+// HOST: %[[VAL:.*]] = load i32, i32* %[[REF]]
+// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// HOST: store i32 %[[VAL2]], i32* %[[REF]]
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_capture_host_ref_by_ref(int *out) {
+ int &ref = global_host_var;
+ [&](){ ref++; *out = ref;}();
+}
+
+// HOST-LABEL: define void @_Z8host_refPi(
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// HOST: store i32 %[[VAL2]], i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_ref(int *out) {
+ int &ref = global_host_var;
+ ref++;
+ *out = ref;
+}
+
+// HOST-LABEL: @_ZZ15host_lambda_refPiENKUlvE_clEv(
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: %[[VAL2:.*]] = add nsw i32 %[[VAL]], 1
+// HOST: store i32 %[[VAL2]], i32* @global_host_var
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]]
+void host_lambda_ref(int *out) {
+ [=](){
+ int &ref = global_host_var;
+ ref++;
+ *out = ref;
+ }();
+}
+
+// HOST-LABEL: define void @_Z28dev_capture_host_ref_by_copyPi(
+// HOST: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %{{.*}}, i32 0, i32 1
+// HOST: %[[VAL:.*]] = load i32, i32* @global_host_var
+// HOST: store i32 %[[VAL]], i32* %[[CAP]]
+// DEV-LABEL: define internal void @_ZZ28dev_capture_host_ref_by_copyPiENKUlvE_clEv(
+// DEV: %[[CAP:.*]] = getelementptr inbounds %[[T3]], %[[T3]]* %this1, i32 0, i32 1
+// DEV: %[[VAL:.*]] = load i32, i32* %[[CAP]]
+// DEV: store i32 %[[VAL]]
+void dev_capture_host_ref_by_copy(int *out) {
+ int &ref = global_host_var;
+ kern<<<1, 1>>>([=]__device__() { *out = ref;});
+}
+
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -1952,6 +1952,35 @@
TemplateArgs);
}
+// CUDA/HIP: Check whether a captured reference variable is referencing a
+// host variable in a device or host device lambda.
+static bool isCapturingReferenceToHostVarInCUDADeviceLambda(Sema &S,
+ VarDecl *VD) {
+ if (!S.getLangOpts().CUDA || !VD->hasInit())
+ return false;
+ assert(VD->getType()->isReferenceType());
+
+ // Check whether the reference variable is referencing a host variable.
+ auto *DRE = dyn_cast<DeclRefExpr>(VD->getInit());
+ if (!DRE)
+ return false;
+ auto *Referee = dyn_cast<VarDecl>(DRE->getDecl());
+ if (!Referee || !Referee->hasGlobalStorage() ||
+ Referee->hasAttr<CUDADeviceAttr>())
+ return false;
+
+ // Check whether the current function is a device or host device lambda.
+ // Check whether the reference variable is a capture by getDeclContext()
+ // since refersToEnclosingVariableOrCapture() is not ready at this point.
+ auto *MD = dyn_cast_or_null<CXXMethodDecl>(S.CurContext);
+ if (MD && MD->getParent()->isLambda() &&
+ MD->getOverloadedOperator() == OO_Call && MD->hasAttr<CUDADeviceAttr>() &&
+ VD->getDeclContext() != MD)
+ return true;
+
+ return false;
+}
+
NonOdrUseReason Sema::getNonOdrUseReasonInCurrentContext(ValueDecl *D) {
// A declaration named in an unevaluated operand never constitutes an odr-use.
if (isUnevaluatedContext())
@@ -1961,9 +1990,16 @@
// A variable x whose name appears as a potentially-evaluated expression e
// is odr-used by e unless [...] x is a reference that is usable in
// constant expressions.
+ // CUDA/HIP:
+ // If a reference variable referencing a host variable is captured in a
+ // device or host device lambda, the value of the referee must be copied
+ // to the capture and the reference variable must be treated as odr-use
+ // since the value of the referee is not known at compile time and must
+ // be loaded from the captured.
if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
if (VD->getType()->isReferenceType() &&
!(getLangOpts().OpenMP && isOpenMPCapturedDecl(D)) &&
+ !isCapturingReferenceToHostVarInCUDADeviceLambda(*this, VD) &&
VD->isUsableInConstantExpressions(Context))
return NOUR_Constant;
}
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -1522,6 +1522,29 @@
if (result.HasSideEffects)
return ConstantEmission();
+ // In CUDA/HIP device compilation, a lambda may capture a reference variable
+ // referencing a global host variable by copy. In this case the lambda should
+ // make a copy of the value of the global host variable. The DRE of the
+ // captured reference variable cannot be emitted as load from the host
+ // global variable as compile time constant, since the host variable is not
+ // accessible on device. The DRE of the captured reference variable has to be
+ // loaded from captures.
+ if (CGM.getLangOpts().CUDAIsDevice &&
+ refExpr->refersToEnclosingVariableOrCapture()) {
+ auto *MD = dyn_cast_or_null<CXXMethodDecl>(CurCodeDecl);
+ if (MD && MD->getParent()->isLambda() &&
+ MD->getOverloadedOperator() == OO_Call) {
+ const APValue::LValueBase &base = result.Val.getLValueBase();
+ if (const ValueDecl *D = base.dyn_cast<const ValueDecl *>()) {
+ if (const VarDecl *VD = dyn_cast<const VarDecl>(D)) {
+ if (!VD->hasAttr<CUDADeviceAttr>()) {
+ return ConstantEmission();
+ }
+ }
+ }
+ }
+ }
+
// Emit as a constant.
auto C = ConstantEmitter(*this).emitAbstract(refExpr->getLocation(),
result.Val, resultType);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits