On Mon, Jul 13, 2015 at 18:50:29 +0300, Ilya Verbin wrote: > On Mon, Jul 13, 2015 at 17:26:43 +0200, Jakub Jelinek wrote: > > > > > > + /* FIXME: Support device-to-device somehow? */ > > > > > > > > > > Should libgomp copy data device-host-device if device-device is not > > > > > supported by > > > > > target? Current liboffloadmic doesn't support this. I'll find out > > > > > if there are > > > > > any plans. > > > > > > > > There is also the option to spawn an offloaded function that will just > > > > call > > > > memcpy, or have such a function next to the main () of the program that > > > > we link > > > > in. > > > > > > Do you mean the case when src_devicep == dst_devicep ? It's easy to > > > support > > > this by adding new func into plugin, whithout any changes in > > > liboffloadmic. > > > I thought about memcpy between different devices... > > > > Well, even src_devicep == dst_devicep does not guarantee it is the same > > device, that is the case only if also src_devicep->target_id == > > dst_devicep->target_id, right? > > Why? Devices of one type with different target_id's have different entries in > devices[]. > > > I wouldn't worry about that and just return EINVAL when copying in between > > different devices. > > I'll prepare a patch, which will add an interface for copying within one > device, > covered by GOMP_OFFLOAD_CAP_OPENMP_400.
Here it is. make check-target-libgomp passed. libgomp/ * libgomp.h (struct gomp_device_descr): Add dev2dev_func. * target.c (omp_target_memcpy): Support device-to-device. (omp_target_memcpy_rect_worker): Likewise. (omp_target_memcpy_rect): Likewise. (gomp_load_plugin_for_device): Check for GOMP_OFFLOAD_dev2dev. * testsuite/libgomp.c/target-12.c (main): Extend for testing device-to-device memcpy. liboffloadmic/ * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_dev2dev): New function. * plugin/offload_target_main.cpp (__offload_target_tgt2tgt): New static function, register it in liboffloadmic. diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 8ed1abd..a64b98c 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -768,6 +768,7 @@ struct gomp_device_descr void (*free_func) (int, void *); void *(*dev2host_func) (int, void *, const void *, size_t); void *(*host2dev_func) (int, void *, const void *, size_t); + void *(*dev2dev_func) (int, void *, const void *, size_t); void (*run_func) (int, void *, void *); /* Splay tree containing information about mapped memory regions. */ diff --git a/libgomp/target.c b/libgomp/target.c index 024a9c8..2bfc019 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1329,7 +1329,15 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, gomp_mutex_unlock (&src_devicep->lock); return 0; } - /* FIXME: Support device-to-device somehow? */ + if (src_devicep == dst_devicep) + { + gomp_mutex_lock (&src_devicep->lock); + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_offset, + (char *) src + src_offset, length); + gomp_mutex_unlock (&src_devicep->lock); + return 0; + } return EINVAL; } @@ -1364,6 +1372,10 @@ omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, src_devicep->dev2host_func (src_devicep->target_id, (char *) dst + dst_off, (char *) src + src_off, length); + else if (src_devicep == dst_devicep) + src_devicep->dev2dev_func (src_devicep->target_id, + (char *) dst + dst_off, + (char *) src + src_off, length); else return EINVAL; return 0; @@ -1437,10 +1449,6 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size, src_devicep = NULL; } - /* FIXME: Support device-to-device somehow? */ - if (src_devicep != NULL && dst_devicep != NULL) - return EINVAL; - if (src_devicep) gomp_mutex_lock (&src_devicep->lock); else if (dst_devicep) @@ -1601,10 +1609,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, } \ while (0) /* Similar, but missing functions are not an error. */ -#define DLSYM_OPT(f, n) \ +#define DLSYM_OPT(f, n) \ do \ { \ - const char *tmp_err; \ + const char *tmp_err; \ device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ tmp_err = dlerror (); \ if (tmp_err == NULL) \ @@ -1629,7 +1637,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (host2dev); device->capabilities = device->get_caps_func (); if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - DLSYM (run); + { + DLSYM (run); + DLSYM (dev2dev); + } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { optional_present = optional_total = 0; diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c index 622c583..0d8232e 100644 --- a/libgomp/testsuite/libgomp.c/target-12.c +++ b/libgomp/testsuite/libgomp.c/target-12.c @@ -105,6 +105,22 @@ main () if (err) abort (); + if (omp_target_memcpy (p, p, 10 * sizeof (int), 51 * sizeof (int), + 111 * sizeof (int), d, d) != 0) + abort (); + + #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) map(alloc:q[0:32]) map(from:err) + { + int j; + err = 0; + for (j = 0; j < 10; j++) + if (q[50 + j] != q[110 + j]) + err = 1; + } + + if (err) + abort (); + if (omp_target_disassociate_ptr (q, d) != 0) abort (); } diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp index a2d61b1..25de3b4 100644 --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @@ -451,6 +451,29 @@ GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr, return host_ptr; } +extern "C" void * +GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr, + size_t size) +{ + TRACE ("(dst_ptr = %p, src_ptr = %p, size = %d)", dst_ptr, src_ptr, size); + if (!size) + return dst_ptr; + + VarDesc vd1[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &dst_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &src_ptr; + vd1[1].size = sizeof (void *); + vd1[2].ptr = &size; + vd1[2].size = sizeof (size); + VarDesc2 vd1g[3] = { { "dst_ptr", 0 }, { "src_ptr", 0 }, { "size", 0 } }; + + offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd1, + vd1g); + + return dst_ptr; +} + extern "C" void GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars) { diff --git a/liboffloadmic/plugin/offload_target_main.cpp b/liboffloadmic/plugin/offload_target_main.cpp index 3fead01..18b0146 100644 --- a/liboffloadmic/plugin/offload_target_main.cpp +++ b/liboffloadmic/plugin/offload_target_main.cpp @@ -299,6 +299,29 @@ __offload_target_tgt2host_p2 (OFFLOAD ofldt) __offload_target_leave (ofldt); } +/* Copy SIZE bytes from SRC_PTR to DST_PTR. */ +static void +__offload_target_tgt2tgt (OFFLOAD ofldt) +{ + void *src_ptr = NULL; + void *dst_ptr = NULL; + size_t size = 0; + + VarDesc vd1[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &dst_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &src_ptr; + vd1[1].size = sizeof (void *); + vd1[2].ptr = &size; + vd1[2].size = sizeof (size); + VarDesc2 vd1g[3] = { { "dst_ptr", 0 }, { "src_ptr", 0 }, { "size", 0 } }; + + __offload_target_enter (ofldt, 3, vd1, vd1g); + TRACE ("(dst_ptr = %p, src_ptr = %p, size = %d)", dst_ptr, src_ptr, size); + memcpy (dst_ptr, src_ptr, size); + __offload_target_leave (ofldt); +} + /* Call offload function by the address fn_ptr and pass vars_ptr to it. */ static void __offload_target_run (OFFLOAD ofldt) @@ -363,5 +386,6 @@ REGISTER (host2tgt_p1); REGISTER (host2tgt_p2); REGISTER (tgt2host_p1); REGISTER (tgt2host_p2); +REGISTER (tgt2tgt); REGISTER (run); #undef REGISTER -- Ilya