On 07/31/15 08:16, Nathan Sidwell wrote:
On 07/24/15 15:26, Nathan Sidwell wrote:
Jakub,
this version makes the following changes to the earlier version.
*) Renames things to FOO_ver, rather than FOO_2
*) No attempt to deal with cross-version plugins and libgomp.
*) Adds GOMP_OFFLOAD_version function to plugin. (I went with your approach).
Returns the GOMP_VERSION used to build the plugin, which libgomp checks matches
the value for its build. When we make incompatible changes to the plugin
interface, that value can be incremented.
*) While working on gomp_load_plugin_for_device, I noticed the DLSYM and
DLSYM_OPT macros were somewhat funky. We're loading functions, so don't expect
a NULL value. We can simply check the returned value and only need dlerror when
we get NULL. The counting that DLSYM_OPT does was somewhat funky too. IMHO
better for that macro to simply return a truth value.
ok for trunk?
ping?
Hm, apparently the original update didn't make it to the maiiling list and is
missing from as a reply to:
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02085.html
attaching patch here.
nathan
2015-07-24 Nathan Sidwell <nat...@codesourcery.com>
include/
* gomp-constants.h (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX): New.
(GOMP_VERSION_PACK, GOMP_VERSION_LIB, GOMP_VERSION_DEV): New.
gcc/
* config/nvptx/mkoffload.c (process): Replace
GOMP_offload_{,un}register with GOMP_offload_{,un}register_ver.
libgomp/
* libgomp.map: Add 4.0.2 version.
* target.c (offload_image_descr): Add version field.
(gomp_load_image_to_device): Add version argument. Forward to
versioning loader if available. Improve load mismatch diagnostic.
(gomp_unload_image_from_device): Add version argument. Forward to
versioning unloader if available.
(GOMP_offload_regster): Make stub function, move bulk to ...
(GOMP_offload_register_ver): ... here. Process version argument.
(GOMP_offload_unregister): Make stub function, move bulk to ...
(GOMP_offload_unregister_ver): ... here. Process version argument.
(gomp_init_device): Process version field.
(gomp_unload_device): Process version field.o
(gomp_load_plugin_for_device): Reimplement DLSYM & DLSYM_OPT
macros. Look for versioning function and check it. Fetch
versioning loader and unloader if avaulable.
* libgomp.h (gomp_device_descr): Add version function field. Put
loader and unloader fields in unions.
* oacc-host.c (host_dispatch): Adjust.
* plugin/plugin-nvptx.c: Include gomp-constants.h.
(GOMP_OFFLOAD_version): New.
(GOMP_OFFLOAD_load_image): Replace with ...
(GOMP_OFFLOAD_load_image_ver): ... this. Add version argument and
check it.
(GOMP_OFFLOAD_unload_image): Replace with ....
(GOMP_OFFLOAD_unload_image_ver): ... this. Add version argument and
check it.
Index: include/gomp-constants.h
===================================================================
--- include/gomp-constants.h (revision 226039)
+++ include/gomp-constants.h (working copy)
@@ -113,4 +113,12 @@ enum gomp_map_kind
#define GOMP_DEVICE_ICV -1
#define GOMP_DEVICE_HOST_FALLBACK -2
+/* Versions of libgomp and device-specific plugins. */
+#define GOMP_VERSION 0
+#define GOMP_VERSION_NVIDIA_PTX 0
+
+#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
+#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
+#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff)
+
#endif
Index: gcc/config/nvptx/mkoffload.c
===================================================================
--- gcc/config/nvptx/mkoffload.c (revision 226039)
+++ gcc/config/nvptx/mkoffload.c (working copy)
@@ -881,10 +881,10 @@ process (FILE *in, FILE *out)
"extern \"C\" {\n"
"#endif\n");
- fprintf (out, "extern void GOMP_offload_register"
- " (const void *, int, const void *);\n");
- fprintf (out, "extern void GOMP_offload_unregister"
- " (const void *, int, const void *);\n");
+ fprintf (out, "extern void GOMP_offload_register_ver"
+ " (unsigned, const void *, int, const void *);\n");
+ fprintf (out, "extern void GOMP_offload_unregister_ver"
+ " (unsigned, const void *, int, const void *);\n");
fprintf (out, "#ifdef __cplusplus\n"
"}\n"
@@ -894,15 +894,19 @@ process (FILE *in, FILE *out)
fprintf (out, "static __attribute__((constructor)) void init (void)\n"
"{\n"
- " GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
- " &target_data);\n"
- "};\n", GOMP_DEVICE_NVIDIA_PTX);
+ " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
+ "%d/*NVIDIA_PTX*/, &target_data);\n"
+ "};\n",
+ GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX),
+ GOMP_DEVICE_NVIDIA_PTX);
fprintf (out, "static __attribute__((destructor)) void fini (void)\n"
"{\n"
- " GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
- " &target_data);\n"
- "};\n", GOMP_DEVICE_NVIDIA_PTX);
+ " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
+ "%d/*NVIDIA_PTX*/, &target_data);\n"
+ "};\n",
+ GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX),
+ GOMP_DEVICE_NVIDIA_PTX);
}
static void
Index: libgomp/libgomp.map
===================================================================
--- libgomp/libgomp.map (revision 226039)
+++ libgomp/libgomp.map (working copy)
@@ -234,6 +234,12 @@ GOMP_4.0.1 {
GOMP_offload_unregister;
} GOMP_4.0;
+GOMP_4.0.2 {
+ global:
+ GOMP_offload_register_ver;
+ GOMP_offload_unregister_ver;
+} GOMP_4.0.1;
+
OACC_2.0 {
global:
acc_get_num_devices;
Index: libgomp/target.c
===================================================================
--- libgomp/target.c (revision 226039)
+++ libgomp/target.c (working copy)
@@ -56,6 +56,7 @@ static gomp_mutex_t register_lock;
It contains type of the target device, pointer to host table descriptor, and
pointer to target data. */
struct offload_image_descr {
+ unsigned version;
enum offload_target_type type;
const void *host_table;
const void *target_data;
@@ -642,7 +643,8 @@ gomp_update (struct gomp_device_descr *d
emitting variable and functions in the same order. */
static void
-gomp_load_image_to_device (struct gomp_device_descr *devicep,
+gomp_load_image_to_device (unsigned version,
+ struct gomp_device_descr *devicep,
const void *host_table, const void *target_data,
bool is_register_lock)
{
@@ -658,16 +660,28 @@ gomp_load_image_to_device (struct gomp_d
/* Load image to device and get target addresses for the image. */
struct addr_pair *target_table = NULL;
- int i, num_target_entries
- = devicep->load_image_func (devicep->target_id, target_data,
- &target_table);
+ int i, num_target_entries;
+
+ if (devicep->version_func)
+ num_target_entries
+ = devicep->load_image.ver_func (version, devicep->target_id,
+ target_data, &target_table);
+ else if (GOMP_VERSION_DEV (version))
+ gomp_fatal ("Plugin too old for offload data (0 < %u)",
+ GOMP_VERSION_DEV (version));
+ else
+ num_target_entries
+ = devicep->load_image.unver_func (devicep->target_id,
+ target_data, &target_table);
if (num_target_entries != num_funcs + num_vars)
{
gomp_mutex_unlock (&devicep->lock);
if (is_register_lock)
gomp_mutex_unlock (®ister_lock);
- gomp_fatal ("Can't map target functions or variables");
+ gomp_fatal ("Cannot map target functions or variables"
+ " (expected %u, have %u)", num_funcs + num_vars,
+ num_target_entries);
}
/* Insert host-target address mapping into splay tree. */
@@ -731,7 +745,8 @@ gomp_load_image_to_device (struct gomp_d
The device must be locked. */
static void
-gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+gomp_unload_image_from_device (unsigned version,
+ struct gomp_device_descr *devicep,
const void *host_table, const void *target_data)
{
void **host_func_table = ((void ***) host_table)[0];
@@ -756,8 +771,12 @@ gomp_unload_image_from_device (struct go
k.host_end = k.host_start + 1;
node = splay_tree_lookup (&devicep->mem_map, &k);
}
-
- devicep->unload_image_func (devicep->target_id, target_data);
+
+ if (devicep->version_func)
+ devicep->unload_image.ver_func (version,
+ devicep->target_id, target_data);
+ else
+ devicep->unload_image.unver_func (devicep->target_id, target_data);
/* Remove mappings from splay tree. */
for (j = 0; j < num_funcs; j++)
@@ -786,10 +805,15 @@ gomp_unload_image_from_device (struct go
the target, and TARGET_DATA needed by target plugin. */
void
-GOMP_offload_register (const void *host_table, int target_type,
- const void *target_data)
+GOMP_offload_register_ver (unsigned version, const void *host_table,
+ int target_type, const void *target_data)
{
int i;
+
+ if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
+ gomp_fatal ("Library too old for offload (version %u < %u)",
+ GOMP_VERSION, GOMP_VERSION_LIB (version));
+
gomp_mutex_lock (®ister_lock);
/* Load image to all initialized devices. */
@@ -798,7 +822,8 @@ GOMP_offload_register (const void *host_
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->type == target_type && devicep->is_initialized)
- gomp_load_image_to_device (devicep, host_table, target_data, true);
+ gomp_load_image_to_device (version, devicep,
+ host_table, target_data, true);
gomp_mutex_unlock (&devicep->lock);
}
@@ -807,6 +832,7 @@ GOMP_offload_register (const void *host_
= gomp_realloc_unlock (offload_images,
(num_offload_images + 1)
* sizeof (struct offload_image_descr));
+ offload_images[num_offload_images].version = version;
offload_images[num_offload_images].type = target_type;
offload_images[num_offload_images].host_table = host_table;
offload_images[num_offload_images].target_data = target_data;
@@ -815,13 +841,20 @@ GOMP_offload_register (const void *host_
gomp_mutex_unlock (®ister_lock);
}
+void
+GOMP_offload_register (const void *host_table, int target_type,
+ const void *target_data)
+{
+ GOMP_offload_register_ver (0, host_table, target_type, target_data);
+}
+
/* This function should be called from every offload image while unloading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin. */
void
-GOMP_offload_unregister (const void *host_table, int target_type,
- const void *target_data)
+GOMP_offload_unregister_ver (unsigned version, const void *host_table,
+ int target_type, const void *target_data)
{
int i;
@@ -833,7 +866,8 @@ GOMP_offload_unregister (const void *hos
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->type == target_type && devicep->is_initialized)
- gomp_unload_image_from_device (devicep, host_table, target_data);
+ gomp_unload_image_from_device (version, devicep,
+ host_table, target_data);
gomp_mutex_unlock (&devicep->lock);
}
@@ -848,6 +882,13 @@ GOMP_offload_unregister (const void *hos
gomp_mutex_unlock (®ister_lock);
}
+void
+GOMP_offload_unregister (const void *host_table, int target_type,
+ const void *target_data)
+{
+ GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
+}
+
/* This function initializes the target device, specified by DEVICEP. DEVICEP
must be locked on entry, and remains locked on return. */
@@ -862,8 +903,9 @@ gomp_init_device (struct gomp_device_des
{
struct offload_image_descr *image = &offload_images[i];
if (image->type == devicep->type)
- gomp_load_image_to_device (devicep, image->host_table,
- image->target_data, false);
+ gomp_load_image_to_device (image->version, devicep,
+ image->host_table, image->target_data,
+ false);
}
devicep->is_initialized = true;
@@ -881,7 +923,8 @@ gomp_unload_device (struct gomp_device_d
{
struct offload_image_descr *image = &offload_images[i];
if (image->type == devicep->type)
- gomp_unload_image_from_device (devicep, image->host_table,
+ gomp_unload_image_from_device (image->version, devicep,
+ image->host_table,
image->target_data);
}
}
@@ -1085,43 +1128,41 @@ gomp_load_plugin_for_device (struct gomp
const char *plugin_name)
{
const char *err = NULL, *last_missing = NULL;
- int optional_present, optional_total;
-
- /* Clear any existing error. */
- dlerror ();
void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
if (!plugin_handle)
- {
- err = dlerror ();
- goto out;
- }
+ goto dl_fail;
/* Check if all required functions are available in the plugin and store
- their handlers. */
+ their handlers. None of the symbols can legitimately be NULL,
+ so we don't need to check dlerror all the time. */
#define DLSYM(f) \
- do \
- { \
- device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
- err = dlerror (); \
- if (err != NULL) \
- goto out; \
- } \
- while (0)
- /* Similar, but missing functions are not an error. */
-#define DLSYM_OPT(f, n) \
- do \
- { \
- const char *tmp_err; \
- device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
- tmp_err = dlerror (); \
- if (tmp_err == NULL) \
- optional_present++; \
- else \
- last_missing = #n; \
- optional_total++; \
- } \
- while (0)
+ if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
+ goto dl_fail
+ /* Similar, but missing functions are not an error. Return false if
+ failed, true otherwise. */
+#define DLSYM_OPT(f, n) \
+ ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
+ || (last_missing = #n, 0))
+
+ if (DLSYM_OPT (version, version))
+ {
+ unsigned v = device->version_func ();
+ if (v != GOMP_VERSION)
+ {
+ err = "plugin version mismatch";
+ goto fail;
+ }
+ if (!DLSYM_OPT (load_image.ver, load_image_ver)
+ || !DLSYM_OPT (unload_image.ver, unload_image_ver))
+ goto dl_fail;
+ }
+ else
+ {
+ if (!DLSYM_OPT (load_image.unver, load_image)
+ || !DLSYM_OPT (unload_image.unver, unload_image))
+ goto dl_fail;
+ }
DLSYM (get_name);
DLSYM (get_caps);
@@ -1129,8 +1170,6 @@ gomp_load_plugin_for_device (struct gomp
DLSYM (get_num_devices);
DLSYM (init_device);
DLSYM (fini_device);
- DLSYM (load_image);
- DLSYM (unload_image);
DLSYM (alloc);
DLSYM (free);
DLSYM (dev2host);
@@ -1140,53 +1179,57 @@ gomp_load_plugin_for_device (struct gomp
DLSYM (run);
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
{
- optional_present = optional_total = 0;
- DLSYM_OPT (openacc.exec, openacc_parallel);
- DLSYM_OPT (openacc.register_async_cleanup,
- openacc_register_async_cleanup);
- DLSYM_OPT (openacc.async_test, openacc_async_test);
- DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
- DLSYM_OPT (openacc.async_wait, openacc_async_wait);
- DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
- DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
- DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
- DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
- DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
- DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
- /* Require all the OpenACC handlers if we have
- GOMP_OFFLOAD_CAP_OPENACC_200. */
- if (optional_present != optional_total)
+ if (!DLSYM_OPT (openacc.exec, openacc_parallel)
+ || !DLSYM_OPT (openacc.register_async_cleanup,
+ openacc_register_async_cleanup)
+ || !DLSYM_OPT (openacc.async_test, openacc_async_test)
+ || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
+ || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
+ || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
+ || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
+ || !DLSYM_OPT (openacc.async_wait_all_async,
+ openacc_async_wait_all_async)
+ || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
+ || !DLSYM_OPT (openacc.create_thread_data,
+ openacc_create_thread_data)
+ || !DLSYM_OPT (openacc.destroy_thread_data,
+ openacc_destroy_thread_data))
{
+ /* Require all the OpenACC handlers if we have
+ GOMP_OFFLOAD_CAP_OPENACC_200. */
err = "plugin missing OpenACC handler function";
- goto out;
+ goto fail;
}
- optional_present = optional_total = 0;
- DLSYM_OPT (openacc.cuda.get_current_device,
- openacc_get_current_cuda_device);
- DLSYM_OPT (openacc.cuda.get_current_context,
- openacc_get_current_cuda_context);
- DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
- DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
- /* Make sure all the CUDA functions are there if any of them are. */
- if (optional_present && optional_present != optional_total)
+
+ unsigned cuda = 0;
+ cuda += DLSYM_OPT (openacc.cuda.get_current_device,
+ openacc_get_current_cuda_device);
+ cuda += DLSYM_OPT (openacc.cuda.get_current_context,
+ openacc_get_current_cuda_context);
+ cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
+ cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
+ if (cuda && cuda != 4)
{
+ /* Make sure all the CUDA functions are there if any of them are. */
err = "plugin missing OpenACC CUDA handler function";
- goto out;
+ goto fail;
}
}
#undef DLSYM
#undef DLSYM_OPT
- out:
- if (err != NULL)
- {
- gomp_error ("while loading %s: %s", plugin_name, err);
- if (last_missing)
- gomp_error ("missing function was %s", last_missing);
- if (plugin_handle)
- dlclose (plugin_handle);
- }
- return err == NULL;
+ return 1;
+
+ dl_fail:
+ err = dlerror ();
+ fail:
+ gomp_error ("while loading %s: %s", plugin_name, err);
+ if (last_missing)
+ gomp_error ("missing function was %s", last_missing);
+ if (plugin_handle)
+ dlclose (plugin_handle);
+
+ return 0;
}
/* This function initializes the runtime needed for offloading.
Index: libgomp/libgomp.h
===================================================================
--- libgomp/libgomp.h (revision 226039)
+++ libgomp/libgomp.h (working copy)
@@ -748,8 +748,22 @@ struct gomp_device_descr
int (*get_num_devices_func) (void);
void (*init_device_func) (int);
void (*fini_device_func) (int);
- int (*load_image_func) (int, const void *, struct addr_pair **);
- void (*unload_image_func) (int, const void *);
+
+ unsigned (*version_func) (void);
+
+ /* When all plugins updated, we can remove these unions and just
+ have the versioned entry points. */
+ union
+ {
+ int (*unver_func) (int, const void *, struct addr_pair **);
+ int (*ver_func) (unsigned, int, const void *, struct addr_pair **);
+ } load_image;
+ union
+ {
+ void (*unver_func) (int, const void *);
+ void (*ver_func) (unsigned, int, const void *);
+ } unload_image;
+
void *(*alloc_func) (int, size_t);
void (*free_func) (int, void *);
void *(*dev2host_func) (int, void *, const void *, size_t);
Index: libgomp/plugin/plugin-nvptx.c
===================================================================
--- libgomp/plugin/plugin-nvptx.c (revision 226039)
+++ libgomp/plugin/plugin-nvptx.c (working copy)
@@ -36,6 +36,7 @@
#include "libgomp-plugin.h"
#include "oacc-ptx.h"
#include "oacc-plugin.h"
+#include "gomp-constants.h"
#include <pthread.h>
#include <cuda.h>
@@ -1644,12 +1645,22 @@ typedef struct nvptx_tdata
size_t fn_num;
} nvptx_tdata_t;
+/* Return the libgomp version number we're compatible with. There is
+ no requirement for cross-version compatibility. */
+
+unsigned
+GOMP_OFFLOAD_version (void)
+{
+ return GOMP_VERSION;
+}
+
/* Load the (partial) program described by TARGET_DATA to device
number ORD. Allocate and return TARGET_TABLE. */
int
-GOMP_OFFLOAD_load_image (int ord, const void *target_data,
- struct addr_pair **target_table)
+GOMP_OFFLOAD_load_image_ver (unsigned version, int ord,
+ const void *target_data,
+ struct addr_pair **target_table)
{
CUmodule module;
const char *const *fn_names, *const *var_names;
@@ -1661,6 +1672,11 @@ GOMP_OFFLOAD_load_image (int ord, const
struct ptx_image_data *new_image;
struct ptx_device *dev;
+ if (GOMP_VERSION_DEV (version) != GOMP_VERSION_NVIDIA_PTX)
+ GOMP_PLUGIN_fatal ("Offload data incompatible with PTX plugin"
+ " (version %u != %u)",
+ GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version));
+
GOMP_OFFLOAD_init_device (ord);
dev = ptx_devices[ord];
@@ -1730,11 +1746,15 @@ GOMP_OFFLOAD_load_image (int ord, const
function descriptors allocated by G_O_load_image. */
void
-GOMP_OFFLOAD_unload_image (int ord, const void *target_data)
+GOMP_OFFLOAD_unload_image_ver (unsigned version, int ord,
+ const void *target_data)
{
struct ptx_image_data *image, **prev_p;
struct ptx_device *dev = ptx_devices[ord];
+ if (GOMP_VERSION_DEV (version) != GOMP_VERSION_NVIDIA_PTX)
+ return;
+
pthread_mutex_lock (&dev->image_lock);
for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
if (image->target_data == target_data)
Index: libgomp/oacc-host.c
===================================================================
--- libgomp/oacc-host.c (revision 226039)
+++ libgomp/oacc-host.c (working copy)
@@ -45,8 +45,9 @@ static struct gomp_device_descr host_dis
.get_num_devices_func = GOMP_OFFLOAD_get_num_devices,
.init_device_func = GOMP_OFFLOAD_init_device,
.fini_device_func = GOMP_OFFLOAD_fini_device,
- .load_image_func = GOMP_OFFLOAD_load_image,
- .unload_image_func = GOMP_OFFLOAD_unload_image,
+ .version_func = NULL,
+ .load_image = {.unver_func = GOMP_OFFLOAD_load_image},
+ .unload_image = {.unver_func = GOMP_OFFLOAD_unload_image},
.alloc_func = GOMP_OFFLOAD_alloc,
.free_func = GOMP_OFFLOAD_free,
.dev2host_func = GOMP_OFFLOAD_dev2host,