https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/104790
>From eaa00ef74500833f280405c824d0282862c87b11 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 19 Aug 2024 09:44:37 -0500 Subject: [PATCH 1/2] [OpenMP] Map `omp_default_mem_alloc` to global memory Summary: Currently, we assign this to private memory. This causes failures on some SOLLVE tests. The standard isn't clear on the semantics of this allocation type, but there seems to be a consensus that it's supposed to be shared memory. --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 16 +++++++++------- clang/test/OpenMP/nvptx_allocate_codegen.cpp | 10 ++++------ offload/test/api/omp_device_alloc.c | 16 +++++++++++----- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 8965a14d88a6fb..77038b0f8ddc7b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2048,15 +2048,15 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF, const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); auto AS = LangAS::Default; switch (A->getAllocatorType()) { - // Use the default allocator here as by default local vars are - // threadlocal. case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: - case OMPAllocateDeclAttr::OMPThreadMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: case OMPAllocateDeclAttr::OMPLowLatMemAlloc: - // Follow the user decision - use default allocation. - return Address::invalid(); + AS = LangAS::opencl_global; + break; + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + AS = LangAS::opencl_private; + break; case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: // TODO: implement aupport for user-defined allocators. return Address::invalid(); @@ -2208,12 +2208,14 @@ bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD, case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: // Not supported, fallback to the default mem space. - case OMPAllocateDeclAttr::OMPThreadMemAlloc: case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: case OMPAllocateDeclAttr::OMPCGroupMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: case OMPAllocateDeclAttr::OMPLowLatMemAlloc: - AS = LangAS::Default; + AS = LangAS::opencl_global; + return true; + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + AS = LangAS::opencl_private; return true; case OMPAllocateDeclAttr::OMPConstMemAlloc: AS = LangAS::cuda_constant; diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp index 3f3457dab33c2d..f4bd2458c3d17d 100644 --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -87,10 +87,9 @@ void bar() { // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B:%.*]] = alloca double, align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4 -// CHECK1-NEXT: store double 3.000000e+00, ptr [[B]], align 8 +// CHECK1-NEXT: store double 3.000000e+00, ptr addrspacecast (ptr addrspace(1) @b1 to ptr), align 8 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]] // CHECK1-NEXT: ret i32 [[CALL]] // @@ -98,7 +97,7 @@ void bar() { // CHECK1-LABEL: define {{[^@]+}}@_Z3fooIiET_v // CHECK1-SAME: () #[[ATTR1:[0-9]+]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN2STIiE1mE, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @_ZN2STIiE1mE to ptr), align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr @v, align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr @v, align 4 // CHECK1-NEXT: ret i32 [[TMP1]] @@ -120,13 +119,12 @@ void bar() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[BAR_A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr @bar_a, align 4 // CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double // CHECK1-NEXT: store double [[CONV]], ptr addrspacecast (ptr addrspace(3) @bar_b to ptr), align 8 -// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) @bar_a) #[[ATTR7]] // CHECK1-NEXT: ret void // // diff --git a/offload/test/api/omp_device_alloc.c b/offload/test/api/omp_device_alloc.c index 368c6cfe42949b..b4cfe442d9ded5 100644 --- a/offload/test/api/omp_device_alloc.c +++ b/offload/test/api/omp_device_alloc.c @@ -5,13 +5,19 @@ #include <stdio.h> int main() { -#pragma omp target teams num_teams(4) -#pragma omp parallel +#pragma omp target { - int *ptr = (int *)omp_alloc(sizeof(int), omp_default_mem_alloc); + int *ptr; +#pragma omp allocate(ptr) allocator(omp_default_mem_alloc) + ptr = omp_alloc(sizeof(int), omp_default_mem_alloc); assert(ptr && "Ptr is (null)!"); - *ptr = 1; - assert(*ptr == 1 && "Ptr is not 1"); + *ptr = 0; +#pragma omp parallel num_threads(32) + { +#pragma omp atomic + *ptr += 1; + } + assert(*ptr == 32 && "Ptr is not 32"); omp_free(ptr, omp_default_mem_alloc); } >From fa205004d6bccb9fa6cf4fb846d27a429963650f Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 19 Aug 2024 12:53:48 -0500 Subject: [PATCH 2/2] Update --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 77038b0f8ddc7b..9e095a37552196 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2052,11 +2052,9 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF, case OMPAllocateDeclAttr::OMPDefaultMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: case OMPAllocateDeclAttr::OMPLowLatMemAlloc: - AS = LangAS::opencl_global; break; case OMPAllocateDeclAttr::OMPThreadMemAlloc: - AS = LangAS::opencl_private; - break; + return Address::invalid(); case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: // TODO: implement aupport for user-defined allocators. return Address::invalid(); @@ -2212,10 +2210,8 @@ bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD, case OMPAllocateDeclAttr::OMPCGroupMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: case OMPAllocateDeclAttr::OMPLowLatMemAlloc: - AS = LangAS::opencl_global; - return true; case OMPAllocateDeclAttr::OMPThreadMemAlloc: - AS = LangAS::opencl_private; + AS = LangAS::Default; return true; case OMPAllocateDeclAttr::OMPConstMemAlloc: AS = LangAS::cuda_constant; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits