Hi! On 2021-06-30T10:28:00+0200, I wrote: > On 2021-06-29T16:42:00-0700, Julian Brown <jul...@codesourcery.com> wrote: >> - The OpenACC profiling-interface implementation did not measure >> asynchronous operations properly. > > We'll need to be careful: (possibly, an older version of) that one we > internally had identified to be causing some issues; see the > "acc_prof-parallel-1.c intermittent failure on og10 branch" emails, > 2020-07.
That's still unresolved (not blaming you!); those intermittent failures are still seen. I've not yet been able to look into your follow-on discussion and WIP patch 'acc_prof-parallel-barrier-1.diff' "Add barrier, hack" in detail. As part of the og12 branch setup, Kwok then had to put og12 commit b845d2f62e7da1c4cfdfee99690de94b648d076d "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c" on top of your og12 commit 719f93c8618a134f90b5b661ab70c918d659ad05 "OpenACC profiling-interface fixes for asynchronous operations", and that stuff is now again conflicting with GCC master branch work that I need to cherry-pick into og12 branch. Therefore, I'm now reverting this on og12 branch -- with the intention to resolve that issue on master branch, eventually (but no promises, when). Pushed to devel/omp/gcc-12 branch commit 1818bab2ce9f11d8dde5b378f580971b87a5c4ff 'Revert "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c"', and commit b8beaa8447ed3c1637e8f93a08c0e47b5709290f 'Revert "OpenACC profiling-interface fixes for asynchronous operations"', see attached. Grüße Thomas ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
>From 1818bab2ce9f11d8dde5b378f580971b87a5c4ff Mon Sep 17 00:00:00 2001 From: Thomas Schwinge <tho...@codesourcery.com> Date: Thu, 2 Mar 2023 11:24:28 +0100 Subject: [PATCH 1/2] Revert "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c" ... as a prerequisite for reverting "OpenACC profiling-interface fixes for asynchronous operations". This reverts og12 commit b845d2f62e7da1c4cfdfee99690de94b648d076d. libgomp/ * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Revert "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c" changes. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. --- libgomp/ChangeLog.omp | 8 ++++++++ .../acc_prof-init-1.c | 17 ++++++++++++++++ .../acc_prof-parallel-1.c | 20 +++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 3ed90bb38f2..d55b0503920 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,11 @@ +2023-03-10 Thomas Schwinge <tho...@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Revert + "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c" + changes. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Likewise. + 2023-03-01 Tobias Burnus <tob...@codesourcery.com> Backported from master: diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index 6bbe99df1ff..a33fac7556c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -208,6 +208,21 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * assert (state == 11 || state == 111); +#if defined COPYIN + /* In an 'async' setting, this event may be triggered before actual 'async' + data copying has completed. Given that 'state' appears in 'COPYIN', we + first have to synchronize (that is, let the 'async' 'COPYIN' read the + current 'state' value)... */ + if (acc_async != acc_async_sync) + { + /* "We're not yet accounting for the fact that _OpenACC events may occur + during event processing_"; temporarily disable to avoid deadlock. */ + unreg (acc_ev_none, NULL, acc_toggle_per_thread); + acc_wait (acc_async); + reg (acc_ev_none, NULL, acc_toggle_per_thread); + } + /* ... before modifying it in the following. */ +#endif STATE_OP (state, ++); assert (tool_info != NULL); @@ -280,6 +295,7 @@ int main() { state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 11); } @@ -306,6 +322,7 @@ int main() { state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 111); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 9a542b56fe5..663f7f724d5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -248,6 +248,25 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i assert (state == 4 || state == 104); +#if defined COPYIN + /* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying, + before 'acc_ev_enqueue_launch_start' marks invoking the compute region. + That's the 'state_init = state;' intended to be captured in the compute + regions. */ + /* In an 'async' setting, this event may be triggered before actual 'async' + data copying has completed. Given that 'state' appears in 'COPYIN', we + first have to synchronize (that is, let the 'async' 'COPYIN' read the + current 'state' value)... */ + if (acc_async != acc_async_sync) + { + /* "We're not yet accounting for the fact that _OpenACC events may occur + during event processing_"; temporarily disable to avoid deadlock. */ + unreg (acc_ev_none, NULL, acc_toggle_per_thread); + acc_wait (acc_async); + reg (acc_ev_none, NULL, acc_toggle_per_thread); + } + /* ... before modifying it in the following. */ +#endif STATE_OP (state, ++); assert (tool_info != NULL); @@ -679,6 +698,7 @@ int main() state_init = state; } + acc_async = acc_async_sync; #pragma acc wait assert (state_init == 104); } -- 2.25.1
>From b8beaa8447ed3c1637e8f93a08c0e47b5709290f Mon Sep 17 00:00:00 2001 From: Thomas Schwinge <tho...@codesourcery.com> Date: Thu, 2 Mar 2023 11:28:24 +0100 Subject: [PATCH 2/2] Revert "OpenACC profiling-interface fixes for asynchronous operations" There is occasional execution failure; these changes need to be reviewed. This reverts og12 commit 719f93c8618a134f90b5b661ab70c918d659ad05. libgomp/ * oacc-host.c: Revert "OpenACC profiling-interface fixes for asynchronous operations" changes. * oacc-mem.c: Likewise. * oacc-parallel.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. --- libgomp/ChangeLog.omp | 9 + libgomp/oacc-host.c | 5 +- libgomp/oacc-mem.c | 32 +-- libgomp/oacc-parallel.c | 192 ++++-------------- .../acc_prof-init-1.c | 5 +- .../acc_prof-parallel-1.c | 64 ++++-- 6 files changed, 113 insertions(+), 194 deletions(-) diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index d55b0503920..0e984754bb0 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,5 +1,14 @@ 2023-03-10 Thomas Schwinge <tho...@codesourcery.com> + * oacc-host.c: Revert + "OpenACC profiling-interface fixes for asynchronous operations" + changes. + * oacc-mem.c: Likewise. + * oacc-parallel.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Revert "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c" changes. diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index afc99c5a374..94792abe5ce 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -215,9 +215,10 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)), static void host_openacc_async_queue_callback (struct goacc_asyncqueue *aq __attribute__ ((unused)), - void (*callback_fn)(void *), void *userptr) + void (*callback_fn)(void *) + __attribute__ ((unused)), + void *userptr __attribute__ ((unused))) { - callback_fn (userptr); } static struct goacc_asyncqueue * diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 4b7d306f402..6fb8be98542 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -1431,12 +1431,6 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_mutex_unlock (&acc_dev->lock); } -struct async_prof_callback_info * -queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, - acc_prof_info *prof_info, acc_event_info *event_info, - acc_api_info *api_info, - struct async_prof_callback_info *prev_info); - static void goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, @@ -1447,7 +1441,6 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, struct goacc_thread *thr; struct gomp_device_descr *acc_dev; - struct async_prof_callback_info *data_start_info = NULL; goacc_lazy_initialize (); @@ -1503,19 +1496,9 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, api_info.async_handle = NULL; } - goacc_aq aq = get_goacc_asyncqueue (async); - if (profiling_p) - { - if (aq) - data_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - NULL); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); - } + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -1529,6 +1512,8 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, if (num_waits) goacc_wait (async, num_waits, ap); + goacc_aq aq = get_goacc_asyncqueue (async); + if (data_enter) goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); else @@ -1540,13 +1525,8 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, prof_info.event_type = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - data_start_info); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 81e8eba4225..d66bc882a5f 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -259,62 +259,6 @@ handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, } -struct async_prof_callback_info { - acc_prof_info prof_info; - acc_event_info event_info; - acc_api_info api_info; - struct async_prof_callback_info *start_info; -}; - -static void -async_prof_dispatch (void *ptr) -{ - struct async_prof_callback_info *info - = (struct async_prof_callback_info *) ptr; - - if (info->start_info) - { - /* The TOOL_INFO must be preserved from a start event to the - corresponding end event. Copy that here. */ - void *tool_info = info->start_info->event_info.other_event.tool_info; - info->event_info.other_event.tool_info = tool_info; - } - - goacc_profiling_dispatch (&info->prof_info, &info->event_info, - &info->api_info); - - /* The async_prof_dispatch function is (so far) always used for start/end - profiling event pairs: the start and end parts are queued, then each is - dispatched (or the dispatches might be interleaved before the end part is - queued). - In any case, it's not safe to delete either info structure before the - whole bracketed event is complete. */ - - if (info->start_info) - { - free (info->start_info); - free (info); - } -} - -struct async_prof_callback_info * -queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, - acc_prof_info *prof_info, acc_event_info *event_info, - acc_api_info *api_info, - struct async_prof_callback_info *prev_info) -{ - struct async_prof_callback_info *info = malloc (sizeof (*info)); - - info->prof_info = *prof_info; - info->event_info = *event_info; - info->api_info = *api_info; - info->start_info = prev_info; - - devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch, - (void *) info); - return info; -} - /* Launch a possibly offloaded function with FLAGS. FN is the host fn address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory blocks to be copied to/from the device. Varadic arguments are @@ -340,8 +284,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), unsigned dims[GOMP_DIM_MAX]; unsigned tag; struct goacc_ncarray_info *nca_info = NULL; - struct async_prof_callback_info *comp_start_info = NULL, - *data_start_info = NULL; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", @@ -403,8 +345,31 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), api_info.async_handle = NULL; } + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); + handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); + /* Host fallback if "if" clause is false or if the current device is set to + the host. */ + if (flags & GOACC_FLAG_HOST_FALLBACK) + { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; + goacc_save_and_set_bind (acc_device_host); + fn (hostaddrs); + goacc_restore_bind (); + goto out_prof; + } + else if (acc_device_type (acc_dev->type) == acc_device_host) + { + fn (hostaddrs); + goto out_prof; + } + else if (profiling_p) + api_info.device_api = acc_device_api_cuda; + /* Default: let the runtime choose. */ for (i = 0; i != GOMP_DIM_MAX; i++) dims[i] = 0; @@ -437,12 +402,11 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), if (async == GOMP_LAUNCH_OP_MAX) async = va_arg (ap, unsigned); - /* Set async number in profiling data, unless the device is the - host or we're doing host fallback. */ - if (profiling_p - && !(flags & GOACC_FLAG_HOST_FALLBACK) - && acc_device_type (acc_dev->type) != acc_device_host) - prof_info.async = prof_info.async_queue = async; + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } break; } @@ -470,39 +434,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), va_end (ap); - goacc_aq aq = get_goacc_asyncqueue (async); - - if (profiling_p) - { - if (aq) - comp_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &compute_construct_event_info, - &api_info, NULL); - else - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); - } - - /* Host fallback if "if" clause is false or if the current device is set to - the host. */ - if (flags & GOACC_FLAG_HOST_FALLBACK) - { - prof_info.device_type = acc_device_host; - api_info.device_type = prof_info.device_type; - goacc_save_and_set_bind (acc_device_host); - fn (hostaddrs); - goacc_restore_bind (); - goto out_prof; - } - else if (acc_device_type (acc_dev->type) == acc_device_host) - { - fn (hostaddrs); - goto out_prof; - } - else if (profiling_p) - api_info.device_api = acc_device_api_cuda; - if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; @@ -531,16 +462,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), = compute_construct_event_info.other_event.parent_construct; enter_exit_data_event_info.other_event.implicit = 1; enter_exit_data_event_info.other_event.tool_info = NULL; - if (aq) - data_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - NULL); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } + goacc_aq aq = get_goacc_asyncqueue (async); + tgt = gomp_map_vars_openacc (acc_dev, aq, mapnum, hostaddrs, sizes, kinds, nca_info); free (nca_info); @@ -550,13 +477,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_enter_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - data_start_info); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } devaddrs = gomp_alloca (sizeof (void *) * mapnum); @@ -575,14 +497,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_exit_data_start; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; enter_exit_data_event_info.other_event.tool_info = NULL; - if (aq) - data_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - NULL); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } /* If running synchronously (aq == NULL), this will unmap immediately. */ @@ -592,13 +508,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), { prof_info.event_type = acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - data_start_info); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } out_prof: @@ -607,13 +518,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_compute_construct_end; compute_construct_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &compute_construct_event_info, &api_info, - comp_start_info); - else - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; @@ -851,8 +757,6 @@ GOACC_update (int flags_m, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; - goacc_aq aq = NULL; - struct async_prof_callback_info *update_start_info = NULL; bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); @@ -902,15 +806,7 @@ GOACC_update (int flags_m, size_t mapnum, } if (profiling_p) - { - aq = get_goacc_asyncqueue (async); - if (aq) - update_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &update_event_info, &api_info, NULL); - else - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); - } + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -997,11 +893,7 @@ GOACC_update (int flags_m, size_t mapnum, { prof_info.event_type = acc_ev_update_end; update_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info, - &api_info, update_start_info); - else - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index a33fac7556c..91b373216c9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -172,10 +172,7 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - if (acc_device_type == acc_device_host) - assert (prof_info->async == acc_async_sync); - else - assert (prof_info->async == acc_async); + assert (prof_info->async == /* TODO acc_async */ acc_async_sync); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 663f7f724d5..28a47ccc27d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -316,9 +316,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 5 + assert (state == 7 #if ASYNC_EXIT_DATA - || state == 105 + || state == 107 #endif ); STATE_OP (state, ++); @@ -372,9 +372,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 6 + assert (state == 8 #if ASYNC_EXIT_DATA - || state == 106 + || state == 108 #endif ); STATE_OP (state, ++); @@ -458,10 +458,7 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - if (acc_device_type == acc_device_host) - assert (prof_info->async == acc_async_sync); - else - assert (prof_info->async == acc_async); + assert (prof_info->async == /* TODO acc_async */ acc_async_sync); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); @@ -502,6 +499,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * { /* Compensate for the missing 'acc_ev_enter_data_end'. */ state += 1; + /* Compensate for the missing 'acc_ev_enqueue_launch_start' and + 'acc_ev_enqueue_launch_end'. */ + state += 2; /* Compensate for the missing 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'. */ state += 2; @@ -514,8 +514,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * state += 2; } #endif - assert (state == 7 - || state == 107); + assert (state == 9 + || state == 109); STATE_OP (state, ++); assert (tool_info != NULL); @@ -569,6 +569,17 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (acc_device_type != acc_device_host); + assert (state == 5 + || state == 105); + STATE_OP (state, ++); + + assert (tool_info != NULL); + assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); + assert (tool_info->nested == NULL); + tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info); + assert (tool_info->nested != NULL); + tool_info->nested->nested = NULL; + assert (prof_info->event_type == acc_ev_enqueue_launch_start); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -612,6 +623,13 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); + + tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type; + tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name); + tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs; + tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers; + tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length; + event_info->other_event.tool_info = tool_info->nested; } static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) @@ -620,6 +638,19 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (acc_device_type != acc_device_host); + assert (state == 6 + || state == 106); + STATE_OP (state, ++); + + assert (tool_info != NULL); + assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); + assert (tool_info->nested != NULL); + assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start); + assert (tool_info->nested->event_info.launch_event.kernel_name != NULL); + assert (tool_info->nested->event_info.launch_event.num_gangs >= 1); + assert (tool_info->nested->event_info.launch_event.num_workers >= 1); + assert (tool_info->nested->event_info.launch_event.vector_length >= 1); + assert (prof_info->event_type == acc_ev_enqueue_launch_end); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -639,7 +670,12 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES); assert (event_info->launch_event.parent_construct == acc_construct_parallel); assert (event_info->launch_event.implicit == 1); + assert (event_info->launch_event.tool_info == tool_info->nested); assert (event_info->launch_event.kernel_name != NULL); + assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0); + assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs); + assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers); + assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length); if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); @@ -653,6 +689,10 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); + + free ((void *) tool_info->nested->event_info.launch_event.kernel_name); + free (tool_info->nested); + tool_info->nested = NULL; } @@ -685,7 +725,7 @@ int main() } assert (state_init == 4); } - assert (state == 8); + assert (state == 10); STATE_OP (state, = 100); @@ -702,7 +742,7 @@ int main() #pragma acc wait assert (state_init == 104); } - assert (state == 108); + assert (state == 110); return 0; } -- 2.25.1