On Mon, Jul 13, 2015 at 18:50:29 +0300, Ilya Verbin wrote:
> On Mon, Jul 13, 2015 at 17:26:43 +0200, Jakub Jelinek wrote:
> > > > > > +  /* FIXME: Support device-to-device somehow?  */
> > > > > 
> > > > > Should libgomp copy data device-host-device if device-device is not 
> > > > > supported by
> > > > > target?  Current liboffloadmic doesn't support this.  I'll find out 
> > > > > if there are
> > > > > any plans.
> > > > 
> > > > There is also the option to spawn an offloaded function that will just 
> > > > call
> > > > memcpy, or have such a function next to the main () of the program that 
> > > > we link
> > > > in.
> > > 
> > > Do you mean the case when src_devicep == dst_devicep ?  It's easy to 
> > > support
> > > this by adding new func into plugin, whithout any changes in 
> > > liboffloadmic.
> > > I thought about memcpy between different devices...
> > 
> > Well, even src_devicep == dst_devicep does not guarantee it is the same
> > device, that is the case only if also src_devicep->target_id ==
> > dst_devicep->target_id, right?
> 
> Why?  Devices of one type with different target_id's have different entries in
> devices[].
> 
> > I wouldn't worry about that and just return EINVAL when copying in between
> > different devices.
> 
> I'll prepare a patch, which will add an interface for copying within one 
> device,
> covered by GOMP_OFFLOAD_CAP_OPENMP_400.

Here it is.  make check-target-libgomp passed.


libgomp/
        * libgomp.h (struct gomp_device_descr): Add dev2dev_func.
        * target.c (omp_target_memcpy): Support device-to-device.
        (omp_target_memcpy_rect_worker): Likewise.
        (omp_target_memcpy_rect): Likewise.
        (gomp_load_plugin_for_device): Check for GOMP_OFFLOAD_dev2dev.
        * testsuite/libgomp.c/target-12.c (main): Extend for testing
        device-to-device memcpy.
liboffloadmic/
        * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_dev2dev): New
        function.
        * plugin/offload_target_main.cpp (__offload_target_tgt2tgt): New static
        function, register it in liboffloadmic.


diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 8ed1abd..a64b98c 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -768,6 +768,7 @@ struct gomp_device_descr
   void (*free_func) (int, void *);
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
+  void *(*dev2dev_func) (int, void *, const void *, size_t);
   void (*run_func) (int, void *, void *);
 
   /* Splay tree containing information about mapped memory regions.  */
diff --git a/libgomp/target.c b/libgomp/target.c
index 024a9c8..2bfc019 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1329,7 +1329,15 @@ omp_target_memcpy (void *dst, void *src, size_t length, 
size_t dst_offset,
       gomp_mutex_unlock (&src_devicep->lock);
       return 0;
     }
-  /* FIXME: Support device-to-device somehow?  */
+  if (src_devicep == dst_devicep)
+    {
+      gomp_mutex_lock (&src_devicep->lock);
+      src_devicep->dev2dev_func (src_devicep->target_id,
+                                (char *) dst + dst_offset,
+                                (char *) src + src_offset, length);
+      gomp_mutex_unlock (&src_devicep->lock);
+      return 0;
+    }
   return EINVAL;
 }
 
@@ -1364,6 +1372,10 @@ omp_target_memcpy_rect_worker (void *dst, void *src, 
size_t element_size,
        src_devicep->dev2host_func (src_devicep->target_id,
                                    (char *) dst + dst_off,
                                    (char *) src + src_off, length);
+      else if (src_devicep == dst_devicep)
+       src_devicep->dev2dev_func (src_devicep->target_id,
+                                  (char *) dst + dst_off,
+                                  (char *) src + src_off, length);
       else
        return EINVAL;
       return 0;
@@ -1437,10 +1449,6 @@ omp_target_memcpy_rect (void *dst, void *src, size_t 
element_size,
        src_devicep = NULL;
     }
 
-  /* FIXME: Support device-to-device somehow?  */
-  if (src_devicep != NULL && dst_devicep != NULL)
-    return EINVAL;
-
   if (src_devicep)
     gomp_mutex_lock (&src_devicep->lock);
   else if (dst_devicep)
@@ -1601,10 +1609,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
*device,
     }                                                                  \
   while (0)
   /* Similar, but missing functions are not an error.  */
-#define DLSYM_OPT(f, n)                                                \
+#define DLSYM_OPT(f, n)                                                        
\
   do                                                                   \
     {                                                                  \
-      const char *tmp_err;                                                     
\
+      const char *tmp_err;                                             \
       device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n);    \
       tmp_err = dlerror ();                                            \
       if (tmp_err == NULL)                                             \
@@ -1629,7 +1637,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
*device,
   DLSYM (host2dev);
   device->capabilities = device->get_caps_func ();
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
-    DLSYM (run);
+    {
+      DLSYM (run);
+      DLSYM (dev2dev);
+    }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
       optional_present = optional_total = 0;
diff --git a/libgomp/testsuite/libgomp.c/target-12.c 
b/libgomp/testsuite/libgomp.c/target-12.c
index 622c583..0d8232e 100644
--- a/libgomp/testsuite/libgomp.c/target-12.c
+++ b/libgomp/testsuite/libgomp.c/target-12.c
@@ -105,6 +105,22 @@ main ()
       if (err)
        abort ();
 
+      if (omp_target_memcpy (p, p, 10 * sizeof (int), 51 * sizeof (int),
+                            111 * sizeof (int), d, d) != 0)
+       abort ();
+
+      #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) 
map(alloc:q[0:32]) map(from:err)
+       {
+         int j;
+         err = 0;
+         for (j = 0; j < 10; j++)
+           if (q[50 + j] != q[110 + j])
+             err = 1;
+       }
+
+      if (err)
+       abort ();
+
       if (omp_target_disassociate_ptr (q, d) != 0)
        abort ();
     }
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp 
b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index a2d61b1..25de3b4 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -451,6 +451,29 @@ GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const 
void *tgt_ptr,
   return host_ptr;
 }
 
+extern "C" void *
+GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
+                     size_t size)
+{
+  TRACE ("(dst_ptr = %p, src_ptr = %p, size = %d)", dst_ptr, src_ptr, size);
+  if (!size)
+    return dst_ptr;
+
+  VarDesc vd1[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
+  vd1[0].ptr = &dst_ptr;
+  vd1[0].size = sizeof (void *);
+  vd1[1].ptr = &src_ptr;
+  vd1[1].size = sizeof (void *);
+  vd1[2].ptr = &size;
+  vd1[2].size = sizeof (size);
+  VarDesc2 vd1g[3] = { { "dst_ptr", 0 }, { "src_ptr", 0 }, { "size", 0 } };
+
+  offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd1,
+          vd1g);
+
+  return dst_ptr;
+}
+
 extern "C" void
 GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
 {
diff --git a/liboffloadmic/plugin/offload_target_main.cpp 
b/liboffloadmic/plugin/offload_target_main.cpp
index 3fead01..18b0146 100644
--- a/liboffloadmic/plugin/offload_target_main.cpp
+++ b/liboffloadmic/plugin/offload_target_main.cpp
@@ -299,6 +299,29 @@ __offload_target_tgt2host_p2 (OFFLOAD ofldt)
   __offload_target_leave (ofldt);
 }
 
+/* Copy SIZE bytes from SRC_PTR to DST_PTR.  */
+static void
+__offload_target_tgt2tgt (OFFLOAD ofldt)
+{
+  void *src_ptr = NULL;
+  void *dst_ptr = NULL;
+  size_t size = 0;
+
+  VarDesc vd1[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
+  vd1[0].ptr = &dst_ptr;
+  vd1[0].size = sizeof (void *);
+  vd1[1].ptr = &src_ptr;
+  vd1[1].size = sizeof (void *);
+  vd1[2].ptr = &size;
+  vd1[2].size = sizeof (size);
+  VarDesc2 vd1g[3] = { { "dst_ptr", 0 }, { "src_ptr", 0 }, { "size", 0 } };
+
+  __offload_target_enter (ofldt, 3, vd1, vd1g);
+  TRACE ("(dst_ptr = %p, src_ptr = %p, size = %d)", dst_ptr, src_ptr, size);
+  memcpy (dst_ptr, src_ptr, size);
+  __offload_target_leave (ofldt);
+}
+
 /* Call offload function by the address fn_ptr and pass vars_ptr to it.  */
 static void
 __offload_target_run (OFFLOAD ofldt)
@@ -363,5 +386,6 @@ REGISTER (host2tgt_p1);
 REGISTER (host2tgt_p2);
 REGISTER (tgt2host_p1);
 REGISTER (tgt2host_p2);
+REGISTER (tgt2tgt);
 REGISTER (run);
 #undef REGISTER


  -- Ilya

Reply via email to