Hi Chung-Lin!
On Sat, 5 Jan 2019 17:47:10 +0800, Chung-Lin Tang <[email protected]>
wrote:
> this is the current version of the oacc-* parts of the Async Re-work patch.
>
> I have reverted away from the earlier mentioned attempt of using lockless
> techniques to manage the asyncqueues; it is really hard to do in a 100%
> correct
> manner, unless we only use something like simple lists to manage them,
> which probably makes lookup unacceptably slow.
>
> For now, I have changed to use the conventional locking and success/fail
> return
> codes for the synchronize/serialize hooks.
OK, thanks.
> I hope this is enough to pass
> and get committed.
Well, the "Properly handle wait clause with no arguments" changes still
need to be completed and go in first (to avoid introducing regressions),
and then I will have to see your whole set of changes that you intend to
commit: the bits you've incrementally posted still don't include several
of the changes I suggested and provided patches for (again, to avoid
introducing regressions).
But GCC now is in "regression and documentation fixes mode", so I fear
that it's too late now?
> --- oacc-async.c (revision 267507)
> +++ oacc-async.c (working copy)
> @@ -62,12 +158,10 @@ acc_wait (int async)
> + goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async);
> + thr->dev->openacc.async.synchronize_func (aq);
Have to check the result here? Like you're doing here, for example:
> acc_wait_async (int async1, int async2)
> {
> + if (!thr->dev->openacc.async.synchronize_func (aq1))
> + gomp_fatal ("wait on %d failed", async1);
> + if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
> + gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
> --- oacc-parallel.c (revision 267507)
> +++ oacc-parallel.c (working copy)
> @@ -521,17 +500,22 @@ goacc_wait (int async, int num_waits, va_list *ap)
> if (async == acc_async_sync)
> - acc_wait (qid);
> + acc_dev->openacc.async.synchronize_func (aq);
Likewise?
> else if (qid == async)
> - ;/* If we're waiting on the same asynchronous queue as we're
> - launching on, the queue itself will order work as
> - required, so there's no need to wait explicitly. */
> + /* If we're waiting on the same asynchronous queue as we're
> + launching on, the queue itself will order work as
> + required, so there's no need to wait explicitly. */
> + ;
> else
> - acc_dev->openacc.async_wait_async_func (qid, async);
> + {
> + goacc_aq aq2 = get_goacc_asyncqueue (async);
> + acc_dev->openacc.async.synchronize_func (aq);
> + acc_dev->openacc.async.serialize_func (aq, aq2);
> + }
Likewise?
Also, I had to apply additional changes as attached, to make this build.
Grüße
Thomas
>From e4c187a4be46682a989165c38bc6a8d8324554b9 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <[email protected]>
Date: Mon, 7 Jan 2019 13:25:18 +0100
Subject: [PATCH] [WIP] into async re-work: complete
GOMP_OFFLOAD_openacc_async_synchronize, GOMP_OFFLOAD_openacc_async_serialize
interface changes
---
libgomp/libgomp-plugin.h | 4 ++--
libgomp/plugin/plugin-nvptx.c | 29 +++++++++++++++++++++--------
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index e3c031a282a1..ce3ae125e208 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -115,8 +115,8 @@ extern void GOMP_OFFLOAD_openacc_destroy_thread_data (void *);
extern struct goacc_asyncqueue *GOMP_OFFLOAD_openacc_async_construct (void);
extern bool GOMP_OFFLOAD_openacc_async_destruct (struct goacc_asyncqueue *);
extern int GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *);
-extern void GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
-extern void GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
+extern bool GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
struct goacc_asyncqueue *);
extern void GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *,
void (*)(void *), void *);
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index f42cbf488a79..12f87ba7be4d 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1395,22 +1395,35 @@ GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *aq)
return -1;
}
-void
+bool
GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *aq)
{
- //TODO Is this safe to call, or might this cause deadlock if something's locked?
- CUDA_CALL_ASSERT (cuStreamSynchronize, aq->cuda_stream);
+ CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, aq->cuda_stream);
+ return r == CUDA_SUCCESS;
}
-void
+bool
GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *aq1,
struct goacc_asyncqueue *aq2)
{
+ CUresult r;
CUevent e;
- //TODO Are these safe to call, or might this cause deadlock if something's locked?
- CUDA_CALL_ASSERT (cuEventCreate, &e, CU_EVENT_DISABLE_TIMING);
- CUDA_CALL_ASSERT (cuEventRecord, e, aq1->cuda_stream);
- CUDA_CALL_ASSERT (cuStreamWaitEvent, aq2->cuda_stream, e, 0);
+ r = CUDA_CALL_NOCHECK (cuEventCreate, &e, CU_EVENT_DISABLE_TIMING);
+ if (r != CUDA_SUCCESS)
+ return false;
+ r = CUDA_CALL_NOCHECK (cuEventRecord, e, aq1->cuda_stream);
+ if (r != CUDA_SUCCESS)
+ {
+ //TODO "cuEventDestroy"?
+ return false;
+ }
+ r = CUDA_CALL_NOCHECK (cuStreamWaitEvent, aq2->cuda_stream, e, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ //TODO "cuEventDestroy"?
+ return false;
+ }
+ return true;
}
static void
--
2.17.1