This patch implements the OpenACC 2.5 data clause semantics in libgomp. Is it OK for trunk?
Cesar
2018-06-19 Chung-Lin Tang <clt...@codesourcery.com> Thomas Schwinge <tho...@codesourcery.com> Cesar Philippidis <ce...@codesourcery.com> libgomp/ * libgomp.h (struct splay_tree_key_s): Add dynamic_refcount member. (gomp_acc_remove_pointer): Update declaration. (gomp_acc_declare_allocate): Declare. (gomp_remove_var): Declare. * libgomp.map (OACC_2.5): Define. * oacc-mem.c (acc_map_data): Update refcount. (acc_unmap_data): Likewise. (present_create_copy): Likewise. (acc_create): Add FLAG_PRESENT when calling present_create_copy. (acc_copyin): Likewise. (FLAG_FINALIZE): Define. (delete_copyout): Update dynamic refcounts, add support for FINALIZE. (acc_delete_finalize): New function. (acc_delete_finalize_async): New function. (acc_copyout_finalize): New function. (acc_copyout_finalize_async): New function. (gomp_acc_insert_pointer): Update refcounts. (gomp_acc_remove_pointer): Return if data is not present on the accelerator. * oacc-parallel.c (find_pset): Rename to find_pointer. (find_pointer): Add support for GOMP_MAP_POINTER. (handle_ftn_pointers): New function. (GOACC_parallel_keyed): Update refcounts of variables. (GOACC_enter_exit_data): Add support for finalized data mappings. Add support for GOMP_MAP_{TO,ALLOC,RELESE,FROM}. Update handling of fortran arrays. (GOACC_update): Add support for GOMP_MAP_{ALWAYS_POINTER,TO,FROM}. (GOACC_declare): Add support for GOMP_MAP_RELEASE, remove support for GOMP_MAP_FORCE_FROM. * openacc.f90 (module openacc_internal): Add acc_copyout_finalize_{32_h,64_h,array_h,_l}, and acc_delete_finalize_{32_h,64_h,array_h,_l}. Add interfaces for acc_copyout_finalize and acc_delete_finalize. (acc_copyout_finalize_32_h): New subroutine. (acc_copyout_finalize_64_h): New subroutine. (acc_copyout_finalize_array_h): New subroutine. (acc_delete_finalize_32_h): New subroutine. (acc_delete_finalize_64_h): New subroutine. (acc_delete_finalize_array_h): New subroutine. * openacc.h (acc_copyout_finalize): Declare. (acc_copyout_finalize_async): Declare. (acc_delete_finalize): Declare. (acc_delete_finalize_async): Declare. * openacc_lib.h (acc_copyout_finalize): New interface. (acc_delete_finalize): New interface. * target.c (gomp_map_vars): Update dynamic_refcount. (gomp_remove_var): New function. (gomp_unmap_vars): Use it. (gomp_unload_image_from_device): Likewise. >From 53ee03231c5e6e4747b4ef01335079a2d4a98480 Mon Sep 17 00:00:00 2001 From: Cesar Philippidis <ce...@codesourcery.com> Date: Tue, 19 Jun 2018 09:33:04 -0700 Subject: [PATCH 7/7] runtime changes --- libgomp/libgomp.h | 7 +- libgomp/libgomp.map | 12 +++ libgomp/oacc-mem.c | 196 ++++++++++++++++++++++++++++++++------- libgomp/oacc-parallel.c | 198 ++++++++++++++++++++++++++++++++++------ libgomp/openacc.f90 | 112 +++++++++++++++++++++++ libgomp/openacc.h | 6 ++ libgomp/openacc_lib.h | 40 ++++++++ libgomp/target.c | 41 ++++----- 8 files changed, 528 insertions(+), 84 deletions(-) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 10ea8940c96..3a8cc2bd7d6 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -853,6 +853,8 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; /* Pointer to the original mapping of "omp declare target link" object. */ splay_tree_key link_key; }; @@ -991,7 +993,9 @@ enum gomp_map_vars_kind }; extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); -extern void gomp_acc_remove_pointer (void *, bool, int, int); +extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int); +extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, + unsigned short *); extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, size_t, void **, void **, @@ -1001,6 +1005,7 @@ extern void gomp_unmap_vars (struct target_mem_desc *, bool); extern void gomp_init_device (struct gomp_device_descr *); extern void gomp_free_memmap (struct splay_tree_s *); extern void gomp_unload_device (struct gomp_device_descr *); +extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); /* work.c */ diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 8752348fbf2..2cd3bf524bc 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -386,6 +386,18 @@ OACC_2.0.1 { acc_pcreate; } OACC_2.0; +OACC_2.5 { + global: + acc_copyout_finalize; + acc_copyout_finalize_32_h_; + acc_copyout_finalize_64_h_; + acc_copyout_finalize_array_h_; + acc_delete_finalize; + acc_delete_finalize_32_h_; + acc_delete_finalize_64_h_; + acc_delete_finalize_array_h_; +} OACC_2.0.1; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 158f0862018..3787ce49e38 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -347,6 +347,7 @@ acc_map_data (void *h, void *d, size_t s) tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, &kinds, true, GOMP_MAP_VARS_OPENACC); + tgt->list[0].key->refcount = REFCOUNT_INFINITY; } gomp_mutex_lock (&acc_dev->lock); @@ -389,6 +390,9 @@ acc_unmap_data (void *h) (void *) n->host_start, (int) host_size, (void *) h); } + /* Mark for removal. */ + n->refcount = 1; + t = n->tgt; if (t->refcount == 2) @@ -460,6 +464,11 @@ present_create_copy (unsigned f, void *h, size_t s) gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); } + if (n->refcount != REFCOUNT_INFINITY) + { + n->refcount++; + n->dynamic_refcount++; + } gomp_mutex_unlock (&acc_dev->lock); } else if (!(f & FLAG_CREATE)) @@ -483,6 +492,8 @@ present_create_copy (unsigned f, void *h, size_t s) tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, GOMP_MAP_VARS_OPENACC); + /* Initialize dynamic refcount. */ + tgt->list[0].key->dynamic_refcount = 1; gomp_mutex_lock (&acc_dev->lock); @@ -499,13 +510,13 @@ present_create_copy (unsigned f, void *h, size_t s) void * acc_create (void *h, size_t s) { - return present_create_copy (FLAG_CREATE, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); } void * acc_copyin (void *h, size_t s) { - return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); } void * @@ -542,7 +553,8 @@ acc_pcopyin (void *h, size_t s) } #endif -#define FLAG_COPYOUT (1 << 0) +#define FLAG_COPYOUT (1 << 0) +#define FLAG_FINALIZE (1 << 1) static void delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) @@ -581,15 +593,52 @@ delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) (void *) n->host_start, (int) host_size, (void *) h, (int) s); } - gomp_mutex_unlock (&acc_dev->lock); + if (n->refcount == REFCOUNT_INFINITY) + { + n->refcount = 0; + n->dynamic_refcount = 0; + } + if (n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } - if (f & FLAG_COPYOUT) - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (f & FLAG_FINALIZE) + { + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; + } + else if (n->dynamic_refcount) + { + n->dynamic_refcount--; + n->refcount--; + } - acc_unmap_data (h); + if (n->refcount == 0) + { + if (n->tgt->refcount == 2) + { + struct target_mem_desc *tp, *t; + for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; + tp = t, t = t->prev) + if (n->tgt == t) + { + if (tp) + tp->prev = t->prev; + else + acc_dev->openacc.data_environ = t->prev; + break; + } + } - if (!acc_dev->free_func (acc_dev->target_id, d)) - gomp_fatal ("error in freeing device memory in %s", libfnname); + if (f & FLAG_COPYOUT) + acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + + gomp_remove_var (acc_dev, n); + } + + gomp_mutex_unlock (&acc_dev->lock); } void @@ -598,12 +647,36 @@ acc_delete (void *h , size_t s) delete_copyout (0, h, s, __FUNCTION__); } +void +acc_delete_finalize (void *h , size_t s) +{ + delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__); +} + +void +acc_delete_finalize_async (void *h , size_t s, int async) +{ + delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__); +} + void acc_copyout (void *h, size_t s) { delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); } +void +acc_copyout_finalize (void *h, size_t s) +{ + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__); +} + +void +acc_copyout_finalize_async (void *h, size_t s, int async) +{ + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__); +} + static void update_dev_host (int is_dev, void *h, size_t s) { @@ -659,11 +732,37 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + if (acc_is_present (*hostaddrs, *sizes)) + { + splay_tree_key n; + gomp_mutex_lock (&acc_dev->lock); + n = lookup_host (acc_dev, *hostaddrs, *sizes); + gomp_mutex_unlock (&acc_dev->lock); + + tgt = n->tgt; + for (size_t i = 0; i < tgt->list_count; i++) + if (tgt->list[i].key == n) + { + for (size_t j = 0; j < mapnum; j++) + if (i + j < tgt->list_count && tgt->list[i + j].key) + { + tgt->list[i + j].key->refcount++; + tgt->list[i + j].key->dynamic_refcount++; + } + return; + } + /* Should not reach here. */ + gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset"); + } + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + /* Initialize dynamic refcount. */ + tgt->list[0].key->dynamic_refcount = 1; + gomp_mutex_lock (&acc_dev->lock); tgt->prev = acc_dev->openacc.data_environ; acc_dev->openacc.data_environ = tgt; @@ -671,7 +770,8 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, } void -gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) +gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, + int finalize, int mapnum) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; @@ -679,6 +779,9 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) struct target_mem_desc *t; int minrefs = (mapnum == 1) ? 2 : 3; + if (!acc_is_present (h, s)) + return; + gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, 1); @@ -693,40 +796,65 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) t = n->tgt; - struct target_mem_desc *tp; + if (n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } - if (t->refcount == minrefs) + if (finalize) { - /* This is the last reference, so pull the descriptor off the - chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from - freeing the device memory. */ - t->tgt_end = 0; - t->to_free = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; + } + else if (n->dynamic_refcount) + { + n->dynamic_refcount--; + n->refcount--; + } - for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; - tp = t, t = t->prev) + gomp_mutex_unlock (&acc_dev->lock); + + if (n->refcount == 0) + { + if (t->refcount == minrefs) { - if (n->tgt == t) + /* This is the last reference, so pull the descriptor off the + chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from + freeing the device memory. */ + struct target_mem_desc *tp; + for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; + tp = t, t = t->prev) { - if (tp) - tp->prev = t->prev; - else - acc_dev->openacc.data_environ = t->prev; - break; + if (n->tgt == t) + { + if (tp) + tp->prev = t->prev; + else + acc_dev->openacc.data_environ = t->prev; + break; + } } } - } - if (force_copyfrom) - t->list[0].copy_from = 1; + /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */ + n->refcount = 1; + t->refcount = minrefs; + for (size_t i = 0; i < t->list_count; i++) + if (t->list[i].key == n) + { + t->list[i].copy_from = force_copyfrom ? 1 : 0; + break; + } - gomp_mutex_unlock (&acc_dev->lock); + /* If running synchronously, unmap immediately. */ + if (async < acc_async_noval) + gomp_unmap_vars (t, true); + else + t->device_descr->openacc.register_async_cleanup_func (t, async); + } - /* If running synchronously, unmap immediately. */ - if (async_synchronous_p (async)) - gomp_unmap_vars (t, true); - else - t->device_descr->openacc.register_async_cleanup_func (t, async); + gomp_mutex_unlock (&acc_dev->lock); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); } diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 9eae43131f8..b80ace58590 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -38,15 +38,68 @@ #include <stdarg.h> #include <assert.h> +/* Returns the number of mappings associated with the pointer or pset. PSET + have three mappings, whereas pointer have two. */ + static int -find_pset (int pos, size_t mapnum, unsigned short *kinds) +find_pointer (int pos, size_t mapnum, unsigned short *kinds) { if (pos + 1 >= mapnum) return 0; unsigned char kind = kinds[pos+1] & 0xff; - return kind == GOMP_MAP_TO_PSET; + if (kind == GOMP_MAP_TO_PSET) + return 3; + else if (kind == GOMP_MAP_POINTER) + return 2; + + return 0; +} + +/* Handle the mapping pair that are presented when a + deviceptr clause is used with Fortran. */ + +static void +handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + int i; + + for (i = 0; i < mapnum; i++) + { + unsigned short kind1 = kinds[i] & 0xff; + + /* Handle Fortran deviceptr clause. */ + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) + { + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ + hostaddrs[i] = NULL; + } + } } static void goacc_wait (int async, int num_waits, va_list *ap); @@ -88,6 +141,8 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), thr = goacc_thread (); acc_dev = thr->dev; + handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); + /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (host_fallback) @@ -183,10 +238,29 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), async, dims, tgt); /* If running synchronously, unmap immediately. */ + bool copyfrom = true; if (async_synchronous_p (async)) gomp_unmap_vars (tgt, true); else - tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); + { + bool async_unmap = false; + for (size_t i = 0; i < tgt->list_count; i++) + { + splay_tree_key k = tgt->list[i].key; + if (k && k->refcount == 1) + { + async_unmap = true; + break; + } + } + if (async_unmap) + tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); + else + { + copyfrom = false; + gomp_unmap_vars (tgt, copyfrom); + } + } acc_dev->openacc.async_set_async_func (acc_async_sync); } @@ -286,6 +360,17 @@ GOACC_enter_exit_data (int device, size_t mapnum, va_end (ap); } + /* Determine whether "finalize" semantics apply to all mappings of this + OpenACC directive. */ + bool finalize = false; + if (mapnum > 0) + { + unsigned char kind = kinds[0] & 0xff; + if (kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_FORCE_FROM) + finalize = true; + } + acc_dev->openacc.async_set_async_func (async); /* Determine if this is an "acc enter data". */ @@ -298,13 +383,17 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (kind == GOMP_MAP_FORCE_ALLOC || kind == GOMP_MAP_FORCE_PRESENT - || kind == GOMP_MAP_FORCE_TO) + || kind == GOMP_MAP_FORCE_TO + || kind == GOMP_MAP_TO + || kind == GOMP_MAP_ALLOC) { data_enter = true; break; } - if (kind == GOMP_MAP_DELETE + if (kind == GOMP_MAP_RELEASE + || kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_FROM || kind == GOMP_MAP_FORCE_FROM) break; @@ -312,31 +401,39 @@ GOACC_enter_exit_data (int device, size_t mapnum, kind); } + /* In c, non-pointers and arrays are represented by a single data clause. + Dynamically allocated arrays and subarrays are represented by a data + clause followed by an internal GOMP_MAP_POINTER. + + In fortran, scalars and not allocated arrays are represented by a + single data clause. Allocated arrays and subarrays have three mappings: + 1) the original data clause, 2) a PSET 3) a pointer to the array data. + */ + if (data_enter) { for (i = 0; i < mapnum; i++) { unsigned char kind = kinds[i] & 0xff; - /* Scan for PSETs. */ - int psets = find_pset (i, mapnum, kinds); + /* Scan for pointers and PSETs. */ + int pointer = find_pointer (i, mapnum, kinds); - if (!psets) + if (!pointer) { switch (kind) { - case GOMP_MAP_POINTER: - gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], - &kinds[i]); + case GOMP_MAP_ALLOC: + acc_present_or_create (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_ALLOC: acc_create (hostaddrs[i], sizes[i]); break; - case GOMP_MAP_FORCE_PRESENT: + case GOMP_MAP_TO: acc_present_or_copyin (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_TO: - acc_present_or_copyin (hostaddrs[i], sizes[i]); + acc_copyin (hostaddrs[i], sizes[i]); break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -346,12 +443,13 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); + gomp_acc_insert_pointer (pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and one MAP_POINTER. */ - i += 2; + i += pointer - 1; } } } @@ -360,22 +458,28 @@ GOACC_enter_exit_data (int device, size_t mapnum, { unsigned char kind = kinds[i] & 0xff; - int psets = find_pset (i, mapnum, kinds); + int pointer = find_pointer (i, mapnum, kinds); - if (!psets) + if (!pointer) { switch (kind) { - case GOMP_MAP_POINTER: - gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) - == GOMP_MAP_FORCE_FROM, - async, 1); - break; + case GOMP_MAP_RELEASE: case GOMP_MAP_DELETE: - acc_delete (hostaddrs[i], sizes[i]); + if (acc_is_present (hostaddrs[i], sizes[i])) + { + if (finalize) + acc_delete_finalize (hostaddrs[i], sizes[i]); + else + acc_delete (hostaddrs[i], sizes[i]); + } break; + case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: - acc_copyout (hostaddrs[i], sizes[i]); + if (finalize) + acc_copyout_finalize (hostaddrs[i], sizes[i]); + else + acc_copyout (hostaddrs[i], sizes[i]); break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -385,10 +489,12 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) - == GOMP_MAP_FORCE_FROM, async, 3); + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_FROM); + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, + finalize, pointer); /* See the above comment. */ - i += 2; + i += pointer - 1; } } @@ -447,6 +553,7 @@ GOACC_update (int device, size_t mapnum, acc_dev->openacc.async_set_async_func (async); + bool update_device = false; for (i = 0; i < mapnum; ++i) { unsigned char kind = kinds[i] & 0xff; @@ -457,11 +564,46 @@ GOACC_update (int device, size_t mapnum, case GOMP_MAP_TO_PSET: break; + case GOMP_MAP_ALWAYS_POINTER: + if (update_device) + { + /* Save the contents of the host pointer. */ + void *dptr = acc_deviceptr (hostaddrs[i-1]); + uintptr_t t = *(uintptr_t *) hostaddrs[i]; + + /* Update the contents of the host pointer to reflect + the value of the allocated device memory in the + previous pointer. */ + *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; + acc_update_device (hostaddrs[i], sizeof (uintptr_t)); + + /* Restore the host pointer. */ + *(uintptr_t *) hostaddrs[i] = t; + update_device = false; + } + break; + + case GOMP_MAP_TO: + if (!acc_is_present (hostaddrs[i], sizes[i])) + { + update_device = false; + break; + } + /* Fallthru */ case GOMP_MAP_FORCE_TO: + update_device = true; acc_update_device (hostaddrs[i], sizes[i]); break; + case GOMP_MAP_FROM: + if (!acc_is_present (hostaddrs[i], sizes[i])) + { + update_device = false; + break; + } + /* Fallthru */ case GOMP_MAP_FORCE_FROM: + update_device = false; acc_update_self (hostaddrs[i], sizes[i]); break; @@ -522,6 +664,7 @@ GOACC_declare (int device, size_t mapnum, case GOMP_MAP_FORCE_FROM: case GOMP_MAP_FORCE_TO: case GOMP_MAP_POINTER: + case GOMP_MAP_RELEASE: case GOMP_MAP_DELETE: GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], &kinds[i], GOMP_ASYNC_SYNC, 0); @@ -543,7 +686,6 @@ GOACC_declare (int device, size_t mapnum, break; case GOMP_MAP_FROM: - kinds[i] = GOMP_MAP_FORCE_FROM; GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], &kinds[i], GOMP_ASYNC_SYNC, 0); break; diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index d201d1dde6f..84a8700f072 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -222,6 +222,24 @@ module openacc_internal type (*), dimension (..), contiguous :: a end subroutine + subroutine acc_copyout_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyout_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyout_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t !GCC$ ATTRIBUTES NO_ARG_CHECK :: a @@ -240,6 +258,24 @@ module openacc_internal type (*), dimension (..), contiguous :: a end subroutine + subroutine acc_delete_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_delete_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_delete_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t !GCC$ ATTRIBUTES NO_ARG_CHECK :: a @@ -426,6 +462,14 @@ module openacc_internal integer (c_size_t), value :: len end subroutine + subroutine acc_copyout_finalize_l (a, len) & + bind (C, name = "acc_copyout_finalize") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + subroutine acc_delete_l (a, len) & bind (C, name = "acc_delete") use iso_c_binding, only: c_size_t @@ -434,6 +478,14 @@ module openacc_internal integer (c_size_t), value :: len end subroutine + subroutine acc_delete_finalize_l (a, len) & + bind (C, name = "acc_delete_finalize") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + subroutine acc_update_device_l (a, len) & bind (C, name = "acc_update_device") use iso_c_binding, only: c_size_t @@ -598,12 +650,24 @@ module openacc procedure :: acc_copyout_array_h end interface + interface acc_copyout_finalize + procedure :: acc_copyout_finalize_32_h + procedure :: acc_copyout_finalize_64_h + procedure :: acc_copyout_finalize_array_h + end interface + interface acc_delete procedure :: acc_delete_32_h procedure :: acc_delete_64_h procedure :: acc_delete_array_h end interface + interface acc_delete_finalize + procedure :: acc_delete_finalize_32_h + procedure :: acc_delete_finalize_64_h + procedure :: acc_delete_finalize_array_h + end interface + interface acc_update_device procedure :: acc_update_device_32_h procedure :: acc_update_device_64_h @@ -860,6 +924,30 @@ subroutine acc_copyout_array_h (a) call acc_copyout_l (a, sizeof (a)) end subroutine +subroutine acc_copyout_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_copyout_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_copyout_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyout_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_copyout_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_copyout_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyout_finalize_array_h (a) + use openacc_internal, only: acc_copyout_finalize_l + type (*), dimension (..), contiguous :: a + call acc_copyout_finalize_l (a, sizeof (a)) +end subroutine + subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t, c_size_t use openacc_internal, only: acc_delete_l @@ -884,6 +972,30 @@ subroutine acc_delete_array_h (a) call acc_delete_l (a, sizeof (a)) end subroutine +subroutine acc_delete_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_delete_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_delete_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_delete_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_delete_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_delete_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_delete_finalize_array_h (a) + use openacc_internal, only: acc_delete_finalize_l + type (*), dimension (..), contiguous :: a + call acc_delete_finalize_l (a, sizeof (a)) +end subroutine + subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t, c_size_t use openacc_internal, only: acc_update_device_l diff --git a/libgomp/openacc.h b/libgomp/openacc.h index b8572574f13..02a85a09ddb 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -109,6 +109,12 @@ int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */ +void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW; +void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW; +void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; + /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index 5cf743c2491..737c582041d 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -273,6 +273,26 @@ end subroutine end interface + interface acc_copyout_finalize + subroutine acc_copyout_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyout_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyout_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + interface acc_delete subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t @@ -293,6 +313,26 @@ end subroutine end interface + interface acc_delete_finalize + subroutine acc_delete_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_delete_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_delete_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + interface acc_update_device subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t diff --git a/libgomp/target.c b/libgomp/target.c index 509776d17a8..dda041cdbef 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -859,6 +859,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; + k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1011,6 +1012,23 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) free (tgt); } +attribute_hidden bool +gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) +{ + bool is_tgt_unmapped = false; + splay_tree_remove (&devicep->mem_map, k); + if (k->link_key) + splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + { + is_tgt_unmapped = true; + gomp_unmap_tgt (k->tgt); + } + return is_tgt_unmapped; +} + /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant variables back from device to host: if it is false, it is assumed that this has been done already. */ @@ -1059,16 +1077,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) + tgt->list[i].offset), tgt->list[i].length); if (do_unmap) - { - splay_tree_remove (&devicep->mem_map, k); - if (k->link_key) - splay_tree_insert (&devicep->mem_map, - (splay_tree_node) k->link_key); - if (k->tgt->refcount > 1) - k->tgt->refcount--; - else - gomp_unmap_tgt (k->tgt); - } + gomp_remove_var (devicep, k); } if (tgt->refcount > 1) @@ -1298,17 +1307,7 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep, else { splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); - splay_tree_remove (&devicep->mem_map, n); - if (n->link_key) - { - if (n->tgt->refcount > 1) - n->tgt->refcount--; - else - { - is_tgt_unmapped = true; - gomp_unmap_tgt (n->tgt); - } - } + is_tgt_unmapped = gomp_remove_var (devicep, n); } } -- 2.17.1