On Tue, Jun 30, 2015 at 18:10:44 +0200, Jakub Jelinek wrote: > The thing is whether it is actually a good idea to allocate the enter data > allocated objects together. > In OpenMP 4.0, generally objects would be allocated and deallocated at the > same times, except for multiple host threads trying to map the same variables > into the target. In OpenMP 4.1, due to enter data/exit data, they can be > allocated and freed quite independently, and it is true that is the case > even for target data, one can either target data, then target enter data > to prevent something from being deallocated, then target data end freeing > only parts, etc. So the question is if we think in real-world the > allocation or deallocation will be usually together or not.
IMHO, it's OK to allocate "target data" objects together and "target enter data" objects one by one. I've implemented this approach in the patch bellow. However, if someone writes a program like this: #pragma omp target data map(tofrom: small, arr[:big]) { #pragma omp target enter data map(to: small) } do_a_lot_of_something (); #pragma omp target exit data map(from: small) Big array will be deallocated on target only with 'small' at the end. Is this acceptable? The patch is not ready though, I don't know how to unmap GOMP_MAP_POINTER vars. In gomp_unmap_vars they're unmapped through tgt->list[], but in gomp_exit_data it's impossible to find such var in the splay tree, because hostaddr differs from the address, used at mapping. libgomp/ * target.c (gomp_map_vars_existing): Fix target address for 'always to' array sections. Handle special refcount UINTPTR_MAX. (gomp_map_vars): Handle special refcount UINTPTR_MAX. (gomp_unmap_vars): Decrement k->refcount when it's 1 and k->async_refcount is 0. (gomp_offload_image_to_device): Set refcounts to UINTPTR_MAX. (gomp_exit_data): New static function. (GOMP_target_enter_exit_data): Add mapping/unmapping. * testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array sections. * testsuite/libgomp.c/target-12.c: New test. diff --git a/libgomp/target.c b/libgomp/target.c index a394e95..20e32f8 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -171,10 +171,13 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, if (GOMP_MAP_ALWAYS_TO_P (kind)) devicep->host2dev_func (devicep->target_id, - (void *) (oldn->tgt->tgt_start + oldn->tgt_offset), + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), (void *) newn->host_start, newn->host_end - newn->host_start); - oldn->refcount++; + + if (oldn->refcount != UINTPTR_MAX) + oldn->refcount++; } static int @@ -439,7 +442,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt->list[j].key = k; tgt->list[j].copy_from = false; tgt->list[j].always_copy_from = false; - k->refcount++; + if (k->refcount != UINTPTR_MAX) + k->refcount++; gomp_map_pointer (tgt, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset @@ -578,12 +582,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) continue; bool do_unmap = false; - if (k->refcount > 1) + if (k->refcount > 1 && k->refcount != UINTPTR_MAX) k->refcount--; - else if (k->async_refcount > 0) - k->async_refcount--; - else - do_unmap = true; + else if (k->refcount == 1) + { + if (k->async_refcount > 0) + k->async_refcount--; + else + { + k->refcount--; + do_unmap = true; + } + } if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) || tgt->list[i].always_copy_from) @@ -709,7 +719,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); - tgt->refcount = 1; + tgt->refcount = UINTPTR_MAX; tgt->tgt_start = 0; tgt->tgt_end = 0; tgt->to_free = NULL; @@ -725,7 +735,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep, k->host_end = k->host_start + 1; k->tgt = tgt; k->tgt_offset = target_table[i].start; - k->refcount = 1; + k->refcount = UINTPTR_MAX; k->async_refcount = 0; array->left = NULL; array->right = NULL; @@ -750,7 +760,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep, k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; k->tgt = tgt; k->tgt_offset = target_var->start; - k->refcount = 1; + k->refcount = UINTPTR_MAX; k->async_refcount = 0; array->left = NULL; array->right = NULL; @@ -1121,6 +1131,63 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); } +static void +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + const int typemask = 0xff; + size_t i; + gomp_mutex_lock (&devicep->lock); + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_POINTER: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + continue; + + if (k->refcount > 0 && k->refcount != UINTPTR_MAX) + k->refcount--; + if (kind == GOMP_MAP_DELETE && k->refcount != UINTPTR_MAX) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + devicep->dev2host_func (devicep->target_id, + (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } + + gomp_mutex_unlock (&devicep->lock); +} + void GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds) @@ -1160,13 +1227,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, } if (is_enter_data) - { - /* TODO */ - } + for (i = 0; i < mapnum; i++) + { + struct target_mem_desc *tgt_var + = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], + &kinds[i], true, false); + tgt_var->refcount--; + + /* If the variable was already mapped, tgt_var is not needed. Otherwise + tgt_var will be freed by gomp_unmap_vars or gomp_exit_data. */ + if (tgt_var->refcount == 0) + free (tgt_var); + } else - { - /* TODO */ - } + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); } void diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c index b86097a..98882f0 100644 --- a/libgomp/testsuite/libgomp.c/target-11.c +++ b/libgomp/testsuite/libgomp.c/target-11.c @@ -9,6 +9,17 @@ void test_array_section (int *p) { #pragma omp target data map(alloc: p[0:N]) { + int ok = 1; + for (int i = 10; i < 10 + 4; i++) + p[i] = 997 * i; + + #pragma omp target map(always to:p[10:4]) map(tofrom: ok) + for (int i = 10; i < 10 + 4; i++) + if (p[i] != 997 * i) + ok = 0; + + assert (ok); + #pragma omp target map(always from:p[7:9]) for (int i = 0; i < N; i++) p[i] = i; diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c new file mode 100644 index 0000000..abc6c0a --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-12.c @@ -0,0 +1,110 @@ +/* { dg-require-effective-target offload_device } */ + +#include <stdlib.h> +#include <assert.h> + +#define N 40 + +int sum; +int var1 = 1; +int var2 = 2; + +#pragma omp declare target +int D[N]; +#pragma omp end declare target + +void enter_data (int *X) +{ + #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum) +} + +void exit_data_0 (int *D) +{ + #pragma omp target exit data map(delete: D[:N]) +} + +void exit_data_1 () +{ + #pragma omp target exit data map(from: var1) +} + +void exit_data_2 (int *X) +{ + #pragma omp target exit data map(from: var2) map(release: X[:N], sum) +} + +void test_nested () +{ + int X = 0, Y = 0, Z = 0; + + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target map(from: X, Y, Z) + X = Y = Z = 1337; + assert (X == 0); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target exit data map(from: X) map(release: Y) + assert (X == 0); + assert (Y == 0); + + #pragma omp target exit data map(release: Y) map(delete: Z) + assert (Y == 0); + assert (Z == 0); + } + assert (X == 1337); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target map(from: X) + X = 2448; + assert (X == 2448); + assert (Y == 0); + assert (Z == 0); + + X = 4896; + } + assert (X == 4896); + assert (Y == 0); + assert (Z == 0); +} + +int main () +{ + int *X = malloc (N * sizeof (int)); + int *Y = malloc (N * sizeof (int)); + X[10] = 10; + Y[20] = 20; + enter_data (X); + + exit_data_0 (D); /* This should have no effect on D. */ + + #pragma omp target map(alloc: X[:N]) map(to: Y[:N]) map(always from: sum) + { + var1 += X[10]; + var2 += Y[20]; + sum = var1 + var2; + D[sum]++; + } + + assert (var1 == 1); + assert (var2 == 2); + assert (sum == 33); + + exit_data_1 (); + assert (var1 == 11); + assert (var2 == 2); + + exit_data_2 (X); + assert (var2 == 22); + + free (X); + free (Y); + + test_nested (); + + return 0; +} -- Ilya P.S. Also I found an ICE: #pragma omp declare target int arr[10]; #pragma omp end declare target void foo (int x) { #pragma omp target map(always from: arr[0:10], x) arr[0]; } $ gcc -fopenmp -c test.c test.c: In function ‘foo’: test.c:7:11: internal compiler error: Segmentation fault #pragma omp target map(always from: arr[0:10], x) ^ 0xdc6562 crash_signal gcc/toplev.c:366 0xc574f4 lookup_sfield gcc/omp-low.c:1080 0xc5830d build_sender_ref gcc/omp-low.c:1364 0xc88be4 lower_omp_target gcc/omp-low.c:12898