Hi! Ping.
On Wed, 20 Apr 2016 13:35:28 +0200, I wrote: > On Mon, 28 Sep 2015 15:38:57 -0400, Nathan Sidwell <nat...@acm.org> wrote: > > On 09/24/15 04:40, Jakub Jelinek wrote: > > > Iff GCC 5 compiled offloaded OpenACC/PTX code will always do host fallback > > > anyway because of the incompatible PTX version > > I do agree that it's reasonable to require users to re-compile their code > when switching between major GCC releases, to retain the offloading > feature, or otherwise resort to host fallback execution. I'll propose > some text along these lines for the GCC 6 release notes. > > > > why don't you just > > > do > > > goacc_save_and_set_bind (acc_device_host); > > > fn (hostaddrs); > > > goacc_restore_bind (); > > > > Committed the attached. Thanks for the review. > > What we now got, doesn't work, for several reasons. GCC 5 OpenACC > offloading executables will just run into SIGSEGV. Here is a patch > (which depends on > <http://news.gmane.org/find-root.php?message_id=%3C87a8ko3ea0.fsf%40hertz.schwinge.homeip.net%3E>). > Unfortunately, we have to jump through some hoops: because GCC 5 > compiler-generated OpenACC reductions code emits calls to > acc_get_device_type, and because we'll (have to) always resort to host > fallback execution for GCC 5 executables, we also have to enforce these > acc_get_device_type calls to return acc_device_host; otherwise reductions > will give bogus results. (I hope I'm correctly implementing/using the > symbol versioning "magic".) OK for gcc-6-branch and trunk? Assuming we > want this fixed on gcc-6-branch, should it be part of 6.1 (to avoid 6.1 > users running into the SIGSEGV), or delay for 6.2? > > We don't have an easy way to add test cases to make sure we don't break > such legacy interfaces, do we? (So, I just manually checked a few test > cases.) > > commit c68c6b8e79176f5dc21684efe2517cbfb83a182e > Author: Thomas Schwinge <tho...@codesourcery.com> > Date: Wed Apr 20 13:08:57 2016 +0200 > > libgomp: Make GCC 5 OpenACC offloading executables work > > * libgomp.h: Include "openacc.h". > (goacc_get_device_type_201, goacc_get_device_type_20): New > prototypes. > (oacc_20_201_symver, goacc_get_device_type_201): New macros. > * libgomp.map: Add acc_get_device_type with OACC_2.0.1 symbol > version. > * oacc-init.c (acc_get_device_type): Rename to > goacc_get_device_type_201. > (goacc_get_device_type_20): New function. > * oacc-parallel.c (GOACC_parallel): Call goacc_lazy_initialize. > * plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Refuse version > 0 offload images. > * target.c (gomp_load_image_to_device): Gracefully handle the case > that a plugin refuses to load offload images. > --- > libgomp/libgomp.h | 10 ++++++++++ > libgomp/libgomp.map | 10 ++++++++++ > libgomp/oacc-init.c | 18 +++++++++++++++++- > libgomp/oacc-parallel.c | 11 +++++++++++ > libgomp/plugin/plugin-nvptx.c | 10 +++++++++- > libgomp/target.c | 6 +++++- > 6 files changed, 62 insertions(+), 3 deletions(-) > > diff --git libgomp/libgomp.h libgomp/libgomp.h > index 6a05bbc..9fa1cb1 100644 > --- libgomp/libgomp.h > +++ libgomp/libgomp.h > @@ -1011,6 +1011,8 @@ gomp_work_share_init_done (void) > /* Now that we're back to default visibility, include the globals. */ > #include "libgomp_g.h" > > +#include "openacc.h" > + > /* Include omp.h by parts. */ > #include "omp-lock.h" > #define _LIBGOMP_OMP_LOCK_DEFINED 1 > @@ -1047,11 +1049,17 @@ extern void gomp_set_nest_lock_25 (omp_nest_lock_25_t > *) __GOMP_NOTHROW; > extern void gomp_unset_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW; > extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW; > > +extern acc_device_t goacc_get_device_type_201 (void) __GOACC_NOTHROW; > +extern acc_device_t goacc_get_device_type_20 (void) __GOACC_NOTHROW; > + > # define strong_alias(fn, al) \ > extern __typeof (fn) al __attribute__ ((alias (#fn))); > # define omp_lock_symver(fn) \ > __asm (".symver g" #fn "_30, " #fn "@@OMP_3.0"); \ > __asm (".symver g" #fn "_25, " #fn "@OMP_1.0"); > +# define oacc_20_201_symver(fn) \ > + __asm (".symver go" #fn "_201, " #fn "@@OACC_2.0.1"); \ > + __asm (".symver go" #fn "_20, " #fn "@OACC_2.0"); > #else > # define gomp_init_lock_30 omp_init_lock > # define gomp_destroy_lock_30 omp_destroy_lock > @@ -1063,6 +1071,8 @@ extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t > *) __GOMP_NOTHROW; > # define gomp_set_nest_lock_30 omp_set_nest_lock > # define gomp_unset_nest_lock_30 omp_unset_nest_lock > # define gomp_test_nest_lock_30 omp_test_nest_lock > + > +# define goacc_get_device_type_201 acc_get_device_type > #endif > > #ifdef HAVE_ATTRIBUTE_VISIBILITY > diff --git libgomp/libgomp.map libgomp/libgomp.map > index 4d42c42..4803aab 100644 > --- libgomp/libgomp.map > +++ libgomp/libgomp.map > @@ -304,7 +304,12 @@ OACC_2.0 { > acc_get_num_devices_h_; > acc_set_device_type; > acc_set_device_type_h_; > +#ifdef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT > + # If the assembler used lacks the .symver directive or the linker > + # doesn't support GNU symbol versioning, we have the same symbol in > + # two versions, which Sun ld chokes on. > acc_get_device_type; > +#endif > acc_get_device_type_h_; > acc_set_device_num; > acc_set_device_num_h_; > @@ -378,6 +383,11 @@ OACC_2.0 { > acc_set_cuda_stream; > }; > > +OACC_2.0.1 { > + global: > + acc_get_device_type; > +} OACC_2.0; > + > GOACC_2.0 { > global: > GOACC_data_end; > diff --git libgomp/oacc-init.c libgomp/oacc-init.c > index 42d005d..a7a2243 100644 > --- libgomp/oacc-init.c > +++ libgomp/oacc-init.c > @@ -528,7 +528,7 @@ acc_set_device_type (acc_device_t d) > ialias (acc_set_device_type) > > acc_device_t > -acc_get_device_type (void) > +goacc_get_device_type_201 (void) > { > acc_device_t res = acc_device_none; > struct gomp_device_descr *dev; > @@ -552,8 +552,24 @@ acc_get_device_type (void) > return res; > } > > +#ifdef LIBGOMP_GNU_SYMBOL_VERSIONING > + > +/* Legacy entry point (GCC 5). Only provide host fallback execution. */ > + > +acc_device_t > +goacc_get_device_type_20 (void) > +{ > + return acc_device_host; > +} > + > +oacc_20_201_symver (acc_get_device_type) > + > +#else /* LIBGOMP_GNU_SYMBOL_VERSIONING */ > + > ialias (acc_get_device_type) > > +#endif /* LIBGOMP_GNU_SYMBOL_VERSIONING */ > + > int > acc_get_device_num (acc_device_t d) > { > diff --git libgomp/oacc-parallel.c libgomp/oacc-parallel.c > index 9fe5020..321fd66 100644 > --- libgomp/oacc-parallel.c > +++ libgomp/oacc-parallel.c > @@ -203,6 +203,17 @@ GOACC_parallel (int device, void (*fn) (void *), > int num_gangs, int num_workers, int vector_length, > int async, int num_waits, ...) > { > +#ifdef HAVE_INTTYPES_H > + gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, sizes=%p, kinds=%p, " > + "async = %d\n", > + __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async); > +#else > + gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, > async=%d\n", > + __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds, > + async); > +#endif > + goacc_lazy_initialize (); > + > goacc_save_and_set_bind (acc_device_host); > fn (hostaddrs); > goacc_restore_bind (); > diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c > index fc5f298..56e6fae 100644 > --- libgomp/plugin/plugin-nvptx.c > +++ libgomp/plugin/plugin-nvptx.c > @@ -1537,7 +1537,15 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, > const void *target_data, > GOMP_PLUGIN_fatal ("Offload data incompatible with PTX plugin" > " (expected %u, received %u)", > GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version)); > - > + if (GOMP_VERSION_DEV (version) == 0) > + { > + /* We're no longer support offload data generated by version 0 > mkoffload; > + it won't be used in the legacy GOMP_parallel entry point. */ > + GOMP_PLUGIN_debug (0, "Offload data not loaded (version %u)\n", > + GOMP_VERSION_DEV (version)); > + return -1; > + } > + > GOMP_OFFLOAD_init_device (ord); > > dev = ptx_devices[ord]; > diff --git libgomp/target.c libgomp/target.c > index dd6f74d..2fbfa6e 100644 > --- libgomp/target.c > +++ libgomp/target.c > @@ -1008,7 +1008,11 @@ gomp_load_image_to_device (struct gomp_device_descr > *devicep, unsigned version, > num_target_entries > = devicep->load_image_func (devicep->target_id, version, > target_data, &target_table); > - > + if (num_target_entries < 0) > + { > + /* The plugin refused this offload data. */ > + return; > + } > if (num_target_entries != num_funcs + num_vars) > { > gomp_mutex_unlock (&devicep->lock); Grüße Thomas