ABataev created this revision.
ABataev added a reviewer: jdoerfert.
Herald added subscribers: sstefan1, guansong, yaxunl.
Herald added a project: clang.

If the variables must be globalized in OpenMP mode (local automatic
variable, GPU compilation mode, the variable may escape its declaration
context by the reference or by the pointer), it should not be considered
as the NRVO candidate. Otherwise, incorrect the return value of the
function might not be updated.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80936

Files:
  clang/lib/CodeGen/CGStmt.cpp
  clang/test/OpenMP/nvptx_NRVO_variable.cpp


Index: clang/test/OpenMP/nvptx_NRVO_variable.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/nvptx_NRVO_variable.cpp
@@ -0,0 +1,30 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown 
-fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown 
-fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device 
-fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct S {
+  int a;
+  S() : a(1) {}
+};
+
+#pragma omp declare target
+void bar(S &);
+// CHECK-LABEL: foo
+S foo() {
+  // CHECK: [[RETVAL:%.+]] = alloca %struct.S,
+  S s;
+  // CHECK: call void @{{.+}}bar{{.+}}(%struct.S* {{.*}}[[S_REF:%.+]])
+  bar(s);
+  // CHECK: [[DEST:%.+]] = bitcast %struct.S* [[RETVAL]] to i8*
+  // CHECK: [[SOURCE:%.+]] = bitcast %struct.S* [[S_REF]] to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* 
{{.*}}[[SOURCE]], i64 4, i1 false)
+  // CHECK: [[VAL:%.+]] = load %struct.S, %struct.S* [[RETVAL]],
+  // CHECK: ret %struct.S [[VAL]]
+  return s;
+}
+#pragma omp end declare target
+
+#endif
Index: clang/lib/CodeGen/CGStmt.cpp
===================================================================
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -11,6 +11,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "CGDebugInfo.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
@@ -1106,8 +1107,13 @@
 
   // FIXME: Clean this up by using an LValue for ReturnTemp,
   // EmitStoreThroughLValue, and EmitAnyExpr.
-  if (getLangOpts().ElideConstructors &&
-      S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
+  // Check if the NRVO candidate was not globalized in OpenMP mode.
+  if (getLangOpts().ElideConstructors && S.getNRVOCandidate() &&
+      S.getNRVOCandidate()->isNRVOVariable() &&
+      (!getLangOpts().OpenMP ||
+       !CGM.getOpenMPRuntime()
+            .getAddressOfLocalVariable(*this, S.getNRVOCandidate())
+            .isValid())) {
     // Apply the named return value optimization for this return statement,
     // which means doing nothing: the appropriate result has already been
     // constructed into the NRVO variable.


Index: clang/test/OpenMP/nvptx_NRVO_variable.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/nvptx_NRVO_variable.cpp
@@ -0,0 +1,30 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct S {
+  int a;
+  S() : a(1) {}
+};
+
+#pragma omp declare target
+void bar(S &);
+// CHECK-LABEL: foo
+S foo() {
+  // CHECK: [[RETVAL:%.+]] = alloca %struct.S,
+  S s;
+  // CHECK: call void @{{.+}}bar{{.+}}(%struct.S* {{.*}}[[S_REF:%.+]])
+  bar(s);
+  // CHECK: [[DEST:%.+]] = bitcast %struct.S* [[RETVAL]] to i8*
+  // CHECK: [[SOURCE:%.+]] = bitcast %struct.S* [[S_REF]] to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SOURCE]], i64 4, i1 false)
+  // CHECK: [[VAL:%.+]] = load %struct.S, %struct.S* [[RETVAL]],
+  // CHECK: ret %struct.S [[VAL]]
+  return s;
+}
+#pragma omp end declare target
+
+#endif
Index: clang/lib/CodeGen/CGStmt.cpp
===================================================================
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGDebugInfo.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
@@ -1106,8 +1107,13 @@
 
   // FIXME: Clean this up by using an LValue for ReturnTemp,
   // EmitStoreThroughLValue, and EmitAnyExpr.
-  if (getLangOpts().ElideConstructors &&
-      S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) {
+  // Check if the NRVO candidate was not globalized in OpenMP mode.
+  if (getLangOpts().ElideConstructors && S.getNRVOCandidate() &&
+      S.getNRVOCandidate()->isNRVOVariable() &&
+      (!getLangOpts().OpenMP ||
+       !CGM.getOpenMPRuntime()
+            .getAddressOfLocalVariable(*this, S.getNRVOCandidate())
+            .isValid())) {
     // Apply the named return value optimization for this return statement,
     // which means doing nothing: the appropriate result has already been
     // constructed into the NRVO variable.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to