Hi Chung-Lin!

On Sat, 5 Jan 2019 17:47:10 +0800, Chung-Lin Tang <chunglin_t...@mentor.com> 
wrote:
> this is the current version of the oacc-* parts of the Async Re-work patch.
> 
> I have reverted away from the earlier mentioned attempt of using lockless
> techniques to manage the asyncqueues; it is really hard to do in a 100% 
> correct
> manner, unless we only use something like simple lists to manage them,
> which probably makes lookup unacceptably slow.
> 
> For now, I have changed to use the conventional locking and success/fail 
> return
> codes for the synchronize/serialize hooks.

OK, thanks.


> I hope this is enough to pass
> and get committed.

Well, the "Properly handle wait clause with no arguments" changes still
need to be completed and go in first (to avoid introducing regressions),
and then I will have to see your whole set of changes that you intend to
commit: the bits you've incrementally posted still don't include several
of the changes I suggested and provided patches for (again, to avoid
introducing regressions).


But GCC now is in "regression and documentation fixes mode", so I fear
that it's too late now?


> --- oacc-async.c      (revision 267507)
> +++ oacc-async.c      (working copy)

> @@ -62,12 +158,10 @@ acc_wait (int async)

> +  goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async);
> +  thr->dev->openacc.async.synchronize_func (aq);

Have to check the result here?  Like you're doing here, for example:

>  acc_wait_async (int async1, int async2)
>  {

> +  if (!thr->dev->openacc.async.synchronize_func (aq1))
> +    gomp_fatal ("wait on %d failed", async1);
> +  if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
> +    gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);

> --- oacc-parallel.c   (revision 267507)
> +++ oacc-parallel.c   (working copy)

> @@ -521,17 +500,22 @@ goacc_wait (int async, int num_waits, va_list *ap)

>        if (async == acc_async_sync)
> -     acc_wait (qid);
> +     acc_dev->openacc.async.synchronize_func (aq);

Likewise?

>        else if (qid == async)
> -     ;/* If we're waiting on the same asynchronous queue as we're
> -         launching on, the queue itself will order work as
> -         required, so there's no need to wait explicitly.  */
> +     /* If we're waiting on the same asynchronous queue as we're
> +        launching on, the queue itself will order work as
> +        required, so there's no need to wait explicitly.  */
> +     ;
>        else
> -     acc_dev->openacc.async_wait_async_func (qid, async);
> +     {
> +       goacc_aq aq2 = get_goacc_asyncqueue (async);
> +       acc_dev->openacc.async.synchronize_func (aq);
> +       acc_dev->openacc.async.serialize_func (aq, aq2);
> +     }

Likewise?


Also, I had to apply additional changes as attached, to make this build.


Grüße
 Thomas


>From e4c187a4be46682a989165c38bc6a8d8324554b9 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <tho...@codesourcery.com>
Date: Mon, 7 Jan 2019 13:25:18 +0100
Subject: [PATCH] [WIP] into async re-work: complete
 GOMP_OFFLOAD_openacc_async_synchronize, GOMP_OFFLOAD_openacc_async_serialize
 interface changes

---
 libgomp/libgomp-plugin.h      |  4 ++--
 libgomp/plugin/plugin-nvptx.c | 29 +++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index e3c031a282a1..ce3ae125e208 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -115,8 +115,8 @@ extern void GOMP_OFFLOAD_openacc_destroy_thread_data (void *);
 extern struct goacc_asyncqueue *GOMP_OFFLOAD_openacc_async_construct (void);
 extern bool GOMP_OFFLOAD_openacc_async_destruct (struct goacc_asyncqueue *);
 extern int GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *);
-extern void GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
-extern void GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
+extern bool GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
 						  struct goacc_asyncqueue *);
 extern void GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *,
 						       void (*)(void *), void *);
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index f42cbf488a79..12f87ba7be4d 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1395,22 +1395,35 @@ GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *aq)
   return -1;
 }
 
-void
+bool
 GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *aq)
 {
-  //TODO Is this safe to call, or might this cause deadlock if something's locked?
-  CUDA_CALL_ASSERT (cuStreamSynchronize, aq->cuda_stream);
+  CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, aq->cuda_stream);
+  return r == CUDA_SUCCESS;
 }
 
-void
+bool
 GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *aq1,
 				      struct goacc_asyncqueue *aq2)
 {
+  CUresult r;
   CUevent e;
-  //TODO Are these safe to call, or might this cause deadlock if something's locked?
-  CUDA_CALL_ASSERT (cuEventCreate, &e, CU_EVENT_DISABLE_TIMING);
-  CUDA_CALL_ASSERT (cuEventRecord, e, aq1->cuda_stream);
-  CUDA_CALL_ASSERT (cuStreamWaitEvent, aq2->cuda_stream, e, 0);
+  r = CUDA_CALL_NOCHECK (cuEventCreate, &e, CU_EVENT_DISABLE_TIMING);
+  if (r != CUDA_SUCCESS)
+    return false;
+  r = CUDA_CALL_NOCHECK (cuEventRecord, e, aq1->cuda_stream);
+  if (r != CUDA_SUCCESS)
+    {
+      //TODO "cuEventDestroy"?
+      return false;
+    }
+  r = CUDA_CALL_NOCHECK (cuStreamWaitEvent, aq2->cuda_stream, e, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      //TODO "cuEventDestroy"?
+      return false;
+    }
+  return true;
 }
 
 static void
-- 
2.17.1

Reply via email to