gtbercea created this revision. gtbercea added reviewers: hfinkel, Hahnfeld, ABataev, carlo.bertolli, caomhin. Herald added a subscriber: jholewinski.
The backend should only emit data sharing code for the cases where it is needed. A new function attribute is used by Clang to enable data sharing only for the cases where OpenMP semantics require it and there are variables that need to be shared. Repository: rL LLVM https://reviews.llvm.org/D41123 Files: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp test/OpenMP/nvptx_data_sharing.cpp Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -22,15 +22,15 @@ /// ========= In the worker function ========= /// -// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{ +// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() #0 // CK1: [[SHAREDARGS:%.+]] = alloca i8** // CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) /// ========= In the kernel function ========= /// -// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() +// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() #1 // CK1: [[SHAREDARGS1:%.+]] = alloca i8** // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] @@ -40,13 +40,18 @@ /// ========= In the data sharing wrapper function ========= /// -// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**){{.*}}{ +// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) #0 // CK1: [[SHAREDARGS2:%.+]] = alloca i8** // CK1: store i8** %2, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]] // CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32** // CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]] // CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]]) +/// ========= Attributes ========= /// + +// CK1-NOT: attributes #0 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } +// CK1: attributes #1 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } + #endif Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -942,6 +942,8 @@ llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); if (!CapturedVars.empty()) { + // There's somehting to share, add the attribute + CGF.CurFn->addFnAttr("has-nvptx-shared-depot"); // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -22,15 +22,15 @@ /// ========= In the worker function ========= /// -// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{ +// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() #0 // CK1: [[SHAREDARGS:%.+]] = alloca i8** // CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) /// ========= In the kernel function ========= /// -// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() +// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() #1 // CK1: [[SHAREDARGS1:%.+]] = alloca i8** // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] @@ -40,13 +40,18 @@ /// ========= In the data sharing wrapper function ========= /// -// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**){{.*}}{ +// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) #0 // CK1: [[SHAREDARGS2:%.+]] = alloca i8** // CK1: store i8** %2, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]] // CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32** // CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]] // CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]]) +/// ========= Attributes ========= /// + +// CK1-NOT: attributes #0 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } +// CK1: attributes #1 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } + #endif Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -942,6 +942,8 @@ llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); if (!CapturedVars.empty()) { + // There's somehting to share, add the attribute + CGF.CurFn->addFnAttr("has-nvptx-shared-depot"); // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits