Hi! On Tue, 16 May 2017 20:55:46 +0800, Chung-Lin Tang <chunglin_t...@mentor.com> wrote: > finalize clause of the exit data directive, and the > corresponding API routines.
> --- libgomp/oacc-parallel.c (revision 248095) > +++ libgomp/oacc-parallel.c (revision 248096) > @@ -355,7 +355,22 @@ > } > } > else > - tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); > + { > + bool async_unmap = false; > + for (size_t i = 0; i < tgt->list_count; i++) > + { > + splay_tree_key k = tgt->list[i].key; > + if (k && k->refcount == 1) > + { > + async_unmap = true; > + break; > + } > + } > + if (async_unmap) > + tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); > + else > + gomp_unmap_vars (tgt, false); > + } > > acc_dev->openacc.async_set_async_func (acc_async_sync); This additional gomp_unmap_vars call also needs be instrumented for the OpenACC Profiling Interface. > --- libgomp/openacc.h (revision 248095) > +++ libgomp/openacc.h (revision 248096) > +/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. > */ > +void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW; > +void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW; > +void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW; > +void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; And for these, the OpenACC Profiling Interface status needs to be documented. Committed to gomp-4_0-branch in r248150: commit dc97f798ad7f7f44f45b2b8e0ece81f3926fa1c2 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Wed May 17 12:07:30 2017 +0000 OpenACC 2.5 Profiling Interface changes for "finalize" handling libgomp/ * libgomp.texi (OpenACC Profiling Interface): Update. * oacc-parallel.c (GOACC_parallel_keyed): Update profiling event generation. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@248150 138bc75d-0d04-0410-961f-82ee72b054a4 --- libgomp/ChangeLog.gomp | 4 ++++ libgomp/libgomp.texi | 4 ++-- libgomp/oacc-parallel.c | 9 +++++++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp index 2ea7215..996c1f9 100644 --- libgomp/ChangeLog.gomp +++ libgomp/ChangeLog.gomp @@ -1,5 +1,9 @@ 2017-05-17 Thomas Schwinge <tho...@codesourcery.com> + * libgomp.texi (OpenACC Profiling Interface): Update. + * oacc-parallel.c (GOACC_parallel_keyed): Update profiling event + generation. + * oacc-parallel.c (GOACC_enter_exit_data): Locally compute "finalize", and remove the formal parameter. Adjust all users. (GOACC_declare): Don't replace GOMP_MAP_FROM with diff --git libgomp/libgomp.texi libgomp/libgomp.texi index 69fb3be..1dea1e2 100644 --- libgomp/libgomp.texi +++ libgomp/libgomp.texi @@ -3459,8 +3459,8 @@ offloading devices (it's not clear if they should be): @item @code{acc_free} @item @code{acc_copyin}, @code{acc_present_or_copyin}, @code{acc_copyin_async} @item @code{acc_create}, @code{acc_present_or_create}, @code{acc_create_async} -@item @code{acc_copyout}, @code{acc_copyout_async} -@item @code{acc_delete}, @code{acc_delete_async} +@item @code{acc_copyout}, @code{acc_copyout_async}, @code{acc_copyout_finalize}, @code{acc_copyout_finalize_async} +@item @code{acc_delete}, @code{acc_delete_async}, @code{acc_delete_finalize}, @code{acc_delete_finalize_async} @item @code{acc_update_device}, @code{acc_update_device_async} @item @code{acc_update_self}, @code{acc_update_self_async} @item @code{acc_map_data}, @code{acc_unmap_data} diff --git libgomp/oacc-parallel.c libgomp/oacc-parallel.c index ff6e96c..622c711 100644 --- libgomp/oacc-parallel.c +++ libgomp/oacc-parallel.c @@ -333,8 +333,10 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), async, dims, tgt); /* If running synchronously, unmap immediately. */ + bool copyfrom = true; if (async < acc_async_noval) { + unmap: if (profiling_dispatch_p) { prof_info.event_type = acc_ev_exit_data_start; @@ -344,7 +346,7 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, &api_info); } - gomp_unmap_vars (tgt, true); + gomp_unmap_vars (tgt, copyfrom); if (profiling_dispatch_p) { prof_info.event_type = acc_ev_exit_data_end; @@ -369,7 +371,10 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), if (async_unmap) tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); else - gomp_unmap_vars (tgt, false); + { + copyfrom = false; + goto unmap; + } } acc_dev->openacc.async_set_async_func (acc_async_sync); (That one can certainly do with some restructuring, to avoid the "goto".) ;-) Grüße Thomas