gtbercea created this revision.
gtbercea added reviewers: ABataev, carlo.bertolli, grokos, caomhin.
Herald added subscribers: cfe-commits, guansong, jholewinski.

This patch moves the call to the stack init data sharing function before the 
splitting of threads into master and workers. This ensures that the 
initialization happens for all active warp master threads. Test is modified 
appropriately.


Repository:
  rC Clang

https://reviews.llvm.org/D44541

Files:
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  test/OpenMP/nvptx_data_sharing.cpp


Index: test/OpenMP/nvptx_data_sharing.cpp
===================================================================
--- test/OpenMP/nvptx_data_sharing.cpp
+++ test/OpenMP/nvptx_data_sharing.cpp
@@ -30,8 +30,9 @@
 // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}()
 // CK1: [[SHAREDARGS1:%.+]] = alloca i8**
 // CK1: [[SHAREDARGS2:%.+]] = alloca i8**
-// CK1: call void @__kmpc_kernel_init
 // CK1: call void @__kmpc_data_sharing_init_stack
+// CK1: call void @__omp_offloading{{.*}}_worker()
+// CK1: call void @__kmpc_kernel_init
 // CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, 
i16 0)
 // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to 
%struct._globalized_locals_ty*
 // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, 
%struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -598,6 +598,11 @@
                                                   WorkerFunctionState &WST) {
   CGBuilderTy &Bld = CGF.Builder;
 
+  // For data sharing, initialize the stack for master and workers.
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(
+          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
+
   llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
   llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
   llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
@@ -626,11 +631,6 @@
   CGF.EmitRuntimeCall(
       createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
 
-  // For data sharing, we need to initialize the stack.
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
-
   emitGenericVarsProlog(CGF, WST.Loc);
 }
 


Index: test/OpenMP/nvptx_data_sharing.cpp
===================================================================
--- test/OpenMP/nvptx_data_sharing.cpp
+++ test/OpenMP/nvptx_data_sharing.cpp
@@ -30,8 +30,9 @@
 // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}()
 // CK1: [[SHAREDARGS1:%.+]] = alloca i8**
 // CK1: [[SHAREDARGS2:%.+]] = alloca i8**
-// CK1: call void @__kmpc_kernel_init
 // CK1: call void @__kmpc_data_sharing_init_stack
+// CK1: call void @__omp_offloading{{.*}}_worker()
+// CK1: call void @__kmpc_kernel_init
 // CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 0)
 // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty*
 // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -598,6 +598,11 @@
                                                   WorkerFunctionState &WST) {
   CGBuilderTy &Bld = CGF.Builder;
 
+  // For data sharing, initialize the stack for master and workers.
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(
+          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
+
   llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
   llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
   llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
@@ -626,11 +631,6 @@
   CGF.EmitRuntimeCall(
       createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
 
-  // For data sharing, we need to initialize the stack.
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
-
   emitGenericVarsProlog(CGF, WST.Loc);
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D44541: [Op... Gheorghe-Teodor Bercea via Phabricator via cfe-commits

Reply via email to