When unified shared memory is required, the default memory space should also be unified.
libgomp/ChangeLog: * config/linux/allocator.c (linux_memspace_alloc): Check omp_requires_mask. (linux_memspace_calloc): Likewise. (linux_memspace_free): Likewise. (linux_memspace_realloc): Likewise. * libgomp.h (omp_requires_mask): New extern. * target.c (omp_requires_mask): Remove static. * testsuite/libgomp.c-c++-common/target-implicit-map-4.c: Add NO_USM_STACK conditional code. --- libgomp/config/linux/allocator.c | 16 ++++++++++++---- libgomp/libgomp.h | 1 + libgomp/target.c | 2 +- .../libgomp.c-c++-common/target-implicit-map-4.c | 16 ++++++++++++++++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index 81d2877b8f1..a026f49be16 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -101,7 +101,9 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin, /* Explicit pinning may not be required. */ pin = pin && !always_pinned_mode; - if (memspace == ompx_gnu_unified_shared_mem_space) + if (memspace == ompx_gnu_unified_shared_mem_space + || (memspace == omp_default_mem_space + && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))) addr = gomp_usm_alloc (size); else if (pin) { @@ -194,7 +196,9 @@ linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) /* Explicit pinning may not be required. */ pin = pin && !always_pinned_mode; - if (memspace == ompx_gnu_unified_shared_mem_space) + if (memspace == ompx_gnu_unified_shared_mem_space + || (memspace == omp_default_mem_space + && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))) { void *ret = gomp_usm_alloc (size); memset (ret, 0, size); @@ -216,7 +220,9 @@ linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, /* Explicit pinning may not be required. */ pin = pin && !always_pinned_mode; - if (memspace == ompx_gnu_unified_shared_mem_space) + if (memspace == ompx_gnu_unified_shared_mem_space + || (memspace == omp_default_mem_space + && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))) gomp_usm_free (addr); else if (pin) { @@ -244,7 +250,9 @@ linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, /* Explicit pinning may not be required. */ pin = pin && !always_pinned_mode; - if (memspace == ompx_gnu_unified_shared_mem_space) + if (memspace == ompx_gnu_unified_shared_mem_space + || (memspace == omp_default_mem_space + && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))) /* Realloc is not implemented for USM. */ ; else if (oldpin && pin) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 707fcdb39d7..4c5c89c8454 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1123,6 +1123,7 @@ extern int gomp_pause_host (void); /* target.c */ +extern int omp_requires_mask; extern void gomp_init_targets_once (void); extern int gomp_get_num_devices (void); extern bool gomp_target_task_fn (void *); diff --git a/libgomp/target.c b/libgomp/target.c index f0ee2c84197..455cac917c9 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -107,7 +107,7 @@ static int num_devices; static int num_devices_openmp; /* OpenMP requires mask. */ -static int omp_requires_mask; +int omp_requires_mask; /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c index 2766312292b..de865352e9b 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-4.c @@ -6,6 +6,9 @@ /* { dg-skip-if "Not all devices allow USM" { offload_device_gcn && { ! omp_usm } } } */ +/* { dg-additional-options "-DNO_USM_STACK" { target offload_target_nvptx } } */ +/* { dg-additional-options "-DNO_USM_STACK" { target offload_target_amdgcn } } */ + #pragma omp requires unified_shared_memory /* Ensure that defaultmap(default : pointer) uses correct OpenMP 5.2 @@ -27,10 +30,23 @@ test_device (int dev) intptr_t ip = (intptr_t) p2; intptr_t ipa = (intptr_t) p2a; +#if NO_USM_STACK + int A_init[3] = {1,2,3}; + int B_init[5] = {4,5,6,7,8}; + int *A = (int*) malloc (sizeof (A_init)); + int *B = (int*) malloc (sizeof (B_init)); + int *p3 = &A[0]; + int *p3a = &B[0]; + + /* Not all USM supports stack variables. */ + __builtin_memcpy (A, A_init, sizeof (A_init)); + __builtin_memcpy (B, B_init, sizeof (B_init)); +#else int A[3] = {1,2,3}; int B[5] = {4,5,6,7,8}; int *p3 = &A[0]; int *p3a = &B[0]; +#endif const omp_alloctrait_t traits[] = { { omp_atk_alignment, 128 }, -- 2.41.0