Hi Julian! On 2020-07-09T17:06:59-0700, Julian Brown <jul...@codesourcery.com> wrote: > This patch adjusts how dynamic reference counts work so that they match > the semantics of the source program more closely, instead of representing > "excess" reference counts beyond those that represent pointers in the > internal libgomp splay-tree data structure. This allows some corner > cases to be handled more gracefully.
Thanks! > OK? Please squeeze in the incremental patch attached; don't need to re-test, I've done that. With that, OK for master and releases/gcc-10 branches. A few comments (for later reference etc.): > --- a/libgomp/oacc-mem.c > +++ b/libgomp/oacc-mem.c > @@ -1067,18 +1072,144 @@ goacc_enter_data_internal (struct gomp_device_descr > *acc_dev, size_t mapnum, > void **hostaddrs, size_t *sizes, > unsigned short *kinds, goacc_aq aq) > { > + gomp_mutex_lock (&acc_dev->lock); > + > for (size_t i = 0; i < mapnum; i++) > { > - int group_last = find_group_last (i, mapnum, sizes, kinds); > + splay_tree_key n; > + size_t group_last = find_group_last (i, mapnum, sizes, kinds); > + bool struct_p = false; > + size_t size, groupnum = (group_last - i) + 1; > + > + switch (kinds[i] & 0xff) > + { > + case GOMP_MAP_STRUCT: > + { > + size = (uintptr_t) hostaddrs[group_last] + sizes[group_last] > + - (uintptr_t) hostaddrs[i]; > + struct_p = true; > + } > + break; > + > + case GOMP_MAP_ATTACH: > + size = sizeof (void *); > + break; > + > + default: > + size = sizes[i]; > + } > > - gomp_map_vars_async (acc_dev, aq, > - (group_last - i) + 1, > - &hostaddrs[i], NULL, > - &sizes[i], &kinds[i], true, > - GOMP_MAP_VARS_OPENACC_ENTER_DATA); > + n = lookup_host (acc_dev, hostaddrs[i], size); > + > + if (n && struct_p) > + { > + for (size_t j = i + 1; j <= group_last; j++) > + { > + struct splay_tree_key_s cur_node; > + cur_node.host_start = (uintptr_t) hostaddrs[j]; > + cur_node.host_end = cur_node.host_start + sizes[j]; > + splay_tree_key n2 > + = splay_tree_lookup (&acc_dev->mem_map, &cur_node); > + if (!n2 > + || n2->tgt != n->tgt > + || n2->host_start - n->host_start > + != n2->tgt_offset - n->tgt_offset) > + { > + gomp_mutex_unlock (&acc_dev->lock); > + gomp_fatal ("Trying to map into device [%p..%p) structure " > + "element when other mapped elements from the " > + "same structure weren't mapped together with " > + "it", (void *) cur_node.host_start, > + (void *) cur_node.host_end); > + } > + } Ah, OK, that was missing before, thanks. (There obviously is overlap with 'libgomp/target.c:gomp_map_fields_existing'; maybe we can de-duplicate that later.) This checking fixes the issue I'd raised with the new 'libgomp.oacc-c-c++-common/struct-3-1-1.c': "after '#pragma acc enter data create(s.a)' we're no longer refusing '#pragma acc enter data create(s.b)'". > + /* This is a special case because we must increment the refcount by > + the number of mapped struct elements, rather than by one. */ Thanks. > + if (n->refcount != REFCOUNT_INFINITY) > + n->refcount += groupnum - 1; > + n->dynamic_refcount += groupnum - 1; > + } > + else if (n && groupnum == 1) > + { > + void *h = hostaddrs[i]; > + size_t s = sizes[i]; > + > + /* A standalone attach clause. */ > + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) > + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, > + (uintptr_t) h, s, NULL); > + > + goacc_map_var_existing (acc_dev, h, s, aq, n); ACK. Just also need to remove 'aq' (see incremental patch attached). > + } > + else if (n && groupnum > 1) > + { > + assert (n->refcount != REFCOUNT_INFINITY > + && n->refcount != REFCOUNT_LINK); > + > + for (size_t j = i + 1; j <= group_last; j++) > + if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH) > + { > + splay_tree_key m > + = lookup_host (acc_dev, hostaddrs[j], sizeof (void *)); > + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m, > + (uintptr_t) hostaddrs[j], sizes[j], NULL); > + } (For possibly later reference, quoting my earlier comment: "Per the earlier '[OpenACC] GOMP_MAP_ATTACH handling in find_group_last', we should never have more than one 'GOMP_MAP_ATTACH' following something else (right?), but it's still OK to leave this in this generic form -- unless you want to add some 'assert'ing here.") > + > + bool processed = false; > + > + struct target_mem_desc *tgt = n->tgt; > + for (size_t j = 0; j < tgt->list_count; j++) > + if (tgt->list[j].key == n) > + { > + /* We are processing a group of mappings (e.g. > + [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]). > + Find the right group in the target_mem_desc's variable > + list, and increment the refcounts for each item in that > + group. */ Thanks. > + for (size_t k = 0; k < groupnum; k++) > + if (j + k < tgt->list_count && tgt->list[j + k].key) > + { > + tgt->list[j + k].key->refcount++; > + tgt->list[j + k].key->dynamic_refcount++; > + } > + processed = true; > + break; > + } So the 'break' added here (compared to the last version) means that there's always only one such "group of mappings" that needs to be handled. > + > + if (!processed) > + { > + gomp_mutex_unlock (&acc_dev->lock); > + gomp_fatal ("dynamic refcount incrementing failed for " > + "pointer/pset"); > + } > + } > + else if (hostaddrs[i]) > + { > + /* The data is not mapped already. Map it now, unless the first > + member in the group has a NULL pointer (e.g. a non-present > + optional parameter). */ Thanks. The handling of case: not present, and 'hostaddrs[i] == NULL' still seems a bit "bumpy" to me, but we can refine that later, if necessary. (For possibly later reference, quoting my earlier comment: "[this case] is exercised by 'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only)". > + gomp_mutex_unlock (&acc_dev->lock); > + > + struct target_mem_desc *tgt > + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, > + &sizes[i], &kinds[i], true, > + GOMP_MAP_VARS_ENTER_DATA); > + assert (tgt); > + > + gomp_mutex_lock (&acc_dev->lock); > + > + for (size_t j = 0; j < tgt->list_count; j++) > + { > + n = tgt->list[j].key; > + if (n) > + n->dynamic_refcount++; > + } > + } > > i = group_last; > } > + > + gomp_mutex_unlock (&acc_dev->lock); > } > @@ -1165,54 +1286,7 @@ goacc_exit_data_internal (struct gomp_device_descr > *acc_dev, size_t mapnum, > if (n == NULL) > continue; > > -[refcounting] > -[copyfrom] > -['is_tgt_unmapped' checking] > + goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq); ACK, thanks. :-) > --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c > @@ -138,12 +138,8 @@ test_acc_data () > fprintf (stderr, "CheCKpOInT1\n"); > // { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } > acc_copyout_finalize (h, sizeof h); > - //TODO goacc_exit_datum: Assertion `is_tgt_unmapped || num_mappings > > 1' failed. > - //TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! > openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case > (without actually XFAILing). > - //TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... > instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. > - //TODO { dg-final { if { [dg-process-target { xfail { ! > openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is > XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. > fprintf (stderr, "CheCKpOInT2\n"); > - // { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { > openacc_host_selected } } } > + // { dg-output ".CheCKpOInT2(\n|\r\n|\r)" } > assert (acc_is_present (h, sizeof h)); > verify_array (h, N, c1); This 'dg-output' checking now fails; the output is: CheCKpOInT1 CheCKpOInT2 ..., that is, no character before 'CheCKpOInT2' (previously a benign typo in that 'dg-output', originating from matching what Fortran 'print' prints). As there is no point in verifying these checkpoints if the item in between now no longer fails, we should just complete remove them, like I had done in <87tuyoy5bb.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87tuyoy5bb.fsf@euler.schwinge.homeip.net> (see attached incremental patch). > --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 > +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 > @@ -40,15 +40,7 @@ program dtype > if (.not. acc_is_present(var%a(5:n - 5))) stop 11 > if (.not. acc_is_present(var%b(5:n - 5))) stop 12 > if (.not. acc_is_present(var)) stop 13 > - print *, "CheCKpOInT1" > - ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } > !$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize > - !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || > num_mappings > 1' failed. > - !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! > openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case > (without actually XFAILing). > - !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... > instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. > - !TODO { dg-final { if { [dg-process-target { xfail { ! > openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is > XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. > - print *, "CheCKpOInT2" > - ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected > } } } > if (acc_get_device_type() .ne. acc_device_host) then > if (acc_is_present(var%a(5:n - 5))) stop 21 > if (acc_is_present(var%b(5:n - 5))) stop 22 ACK, but also need to adjust 'libgomp.oacc-fortran/deep-copy-6-no_finalize.F90' (see attached incremental patch), or turn it into past tense if you like that better maybe? > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90 > +[...] > + if (.not.acc_is_present (a)) stop 1 Needs to be skipped for shared-memory devices (see incremental patch attached). > --- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 > +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 > @@ -4,6 +4,3 @@ > /* Nullify the 'finalize' clause, which disturbs reference counting. */ That comment now no longer applies; either adjust it as I had in <87k0zozmp9.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87k0zozmp9.fsf@euler.schwinge.homeip.net> (see attached incrememntal patch), or turn it into past tense if you like that better maybe? > #define finalize > #include "mdc-refcount-1-1-1.f90" > - > -! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } > -! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" } (ACK.) > --- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 > +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 > @@ -26,8 +26,7 @@ program main > print *, "CheCKpOInT1" > ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } > !$acc exit data detach(var%a) finalize > - !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || > num_mappings > 1' failed. > - !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! > openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case > (without actually XFAILing). > + !TODO { dg-output ".*libgomp: attach count underflow(\n|\r\n|\r)$" { > target { ! openacc_host_selected } } } ! Scan for what we expect in the > "XFAILed" case (without actually XFAILing). > !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... > instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. > !TODO { dg-final { if { [dg-process-target { xfail { ! > openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is > XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. > print *, "CheCKpOInT2" To match what's changed/what's now actually happening, let's use what I had in <87k0zozmp9.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87k0zozmp9.fsf@euler.schwinge.homeip.net> (see attached incrememntal patch). Grüße Thomas ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
>From 2c037be969c4bd2a3c9b1f0f5778483dd8e9a8c6 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge <tho...@codesourcery.com> Date: Fri, 10 Jul 2020 13:45:30 +0200 Subject: [PATCH] into "openacc: Adjust dynamic reference count semantics" --- libgomp/oacc-mem.c | 2 +- libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c | 5 ----- .../libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 | 5 +---- .../libgomp.oacc-fortran/dynamic-incr-structural-1.f90 | 1 + .../testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 | 2 +- .../testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 | 6 +++--- 6 files changed, 7 insertions(+), 14 deletions(-) diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 2d0badfcb2e..855cad84391 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -1140,7 +1140,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) h, s, NULL); - goacc_map_var_existing (acc_dev, h, s, aq, n); + goacc_map_var_existing (acc_dev, h, s, n); } else if (n && groupnum > 1) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c index 2c34c6eef43..db5b35b08d9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c @@ -4,7 +4,6 @@ /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include <assert.h> -#include <stdio.h> #include <stdlib.h> #include <openacc.h> @@ -135,11 +134,7 @@ test_acc_data () assert (acc_is_present (h, sizeof h)); assign_array (h, N, c1); - fprintf (stderr, "CheCKpOInT1\n"); - // { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } acc_copyout_finalize (h, sizeof h); - fprintf (stderr, "CheCKpOInT2\n"); - // { dg-output ".CheCKpOInT2(\n|\r\n|\r)" } assert (acc_is_present (h, sizeof h)); verify_array (h, N, c1); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 index ed4f10e7a3f..038f04a3c37 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 @@ -1,8 +1,5 @@ ! { dg-do run } -/* Nullify the 'finalize' clause, which disturbs reference counting. */ +/* Nullify the 'finalize' clause. */ #define finalize #include "deep-copy-6.f90" - -! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } -! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90 index a5b9d948b38..6b17b1dbbc9 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90 @@ -1,4 +1,5 @@ ! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } program map_multi use openacc diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 index d12ea824609..4307f50c46e 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } -/* Nullify the 'finalize' clause, which disturbs reference counting. */ +/* Nullify the 'finalize' clause. */ #define finalize #include "mdc-refcount-1-1-1.f90" diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 index 0c21a8705ac..fbd52373946 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 @@ -23,15 +23,15 @@ program main if (.not. acc_is_present(var%a)) stop 1 if (.not. acc_is_present(var)) stop 2 + !$acc exit data detach(var%a) finalize print *, "CheCKpOInT1" ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } - !$acc exit data detach(var%a) finalize - !TODO { dg-output ".*libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). + !$acc exit data delete(var%a) + !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. print *, "CheCKpOInT2" ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } - !$acc exit data delete(var%a) if (acc_is_present(var%a)) stop 3 if (.not. acc_is_present(var)) stop 4 -- 2.17.1