Hi Julian!

On 2020-07-09T17:06:59-0700, Julian Brown <jul...@codesourcery.com> wrote:
> This patch adjusts how dynamic reference counts work so that they match
> the semantics of the source program more closely, instead of representing
> "excess" reference counts beyond those that represent pointers in the
> internal libgomp splay-tree data structure. This allows some corner
> cases to be handled more gracefully.

Thanks!

> OK?

Please squeeze in the incremental patch attached; don't need to re-test,
I've done that.  With that, OK for master and releases/gcc-10 branches.

A few comments (for later reference etc.):

> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c

> @@ -1067,18 +1072,144 @@ goacc_enter_data_internal (struct gomp_device_descr 
> *acc_dev, size_t mapnum,
>                          void **hostaddrs, size_t *sizes,
>                          unsigned short *kinds, goacc_aq aq)
>  {
> +  gomp_mutex_lock (&acc_dev->lock);
> +
>    for (size_t i = 0; i < mapnum; i++)
>      {
> -      int group_last = find_group_last (i, mapnum, sizes, kinds);
> +      splay_tree_key n;
> +      size_t group_last = find_group_last (i, mapnum, sizes, kinds);
> +      bool struct_p = false;
> +      size_t size, groupnum = (group_last - i) + 1;
> +
> +      switch (kinds[i] & 0xff)
> +     {
> +     case GOMP_MAP_STRUCT:
> +       {
> +         size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
> +                - (uintptr_t) hostaddrs[i];
> +         struct_p = true;
> +       }
> +       break;
> +
> +     case GOMP_MAP_ATTACH:
> +       size = sizeof (void *);
> +       break;
> +
> +     default:
> +       size = sizes[i];
> +     }
>
> -      gomp_map_vars_async (acc_dev, aq,
> -                        (group_last - i) + 1,
> -                        &hostaddrs[i], NULL,
> -                        &sizes[i], &kinds[i], true,
> -                        GOMP_MAP_VARS_OPENACC_ENTER_DATA);

> +      n = lookup_host (acc_dev, hostaddrs[i], size);
> +
> +      if (n && struct_p)
> +     {
> +       for (size_t j = i + 1; j <= group_last; j++)
> +         {
> +           struct splay_tree_key_s cur_node;
> +           cur_node.host_start = (uintptr_t) hostaddrs[j];
> +           cur_node.host_end = cur_node.host_start + sizes[j];
> +           splay_tree_key n2
> +             = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
> +           if (!n2
> +               || n2->tgt != n->tgt
> +               || n2->host_start - n->host_start
> +                  != n2->tgt_offset - n->tgt_offset)
> +             {
> +               gomp_mutex_unlock (&acc_dev->lock);
> +               gomp_fatal ("Trying to map into device [%p..%p) structure "
> +                           "element when other mapped elements from the "
> +                           "same structure weren't mapped together with "
> +                           "it", (void *) cur_node.host_start,
> +                           (void *) cur_node.host_end);
> +             }
> +         }

Ah, OK, that was missing before, thanks.  (There obviously is overlap
with 'libgomp/target.c:gomp_map_fields_existing'; maybe we can
de-duplicate that later.)

This checking fixes the issue I'd raised with the new
'libgomp.oacc-c-c++-common/struct-3-1-1.c': "after '#pragma acc enter
data create(s.a)' we're no longer refusing '#pragma acc enter data
create(s.b)'".

> +       /* This is a special case because we must increment the refcount by
> +          the number of mapped struct elements, rather than by one.  */

Thanks.

> +       if (n->refcount != REFCOUNT_INFINITY)
> +         n->refcount += groupnum - 1;
> +       n->dynamic_refcount += groupnum - 1;
> +     }

> +      else if (n && groupnum == 1)
> +     {
> +       void *h = hostaddrs[i];
> +       size_t s = sizes[i];
> +
> +       /* A standalone attach clause.  */
> +       if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
> +         gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
> +                              (uintptr_t) h, s, NULL);
> +
> +       goacc_map_var_existing (acc_dev, h, s, aq, n);

ACK.  Just also need to remove 'aq' (see incremental patch attached).

> +     }

> +      else if (n && groupnum > 1)
> +     {
> +       assert (n->refcount != REFCOUNT_INFINITY
> +               && n->refcount != REFCOUNT_LINK);
> +
> +       for (size_t j = i + 1; j <= group_last; j++)
> +         if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
> +           {
> +             splay_tree_key m
> +               = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
> +             gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
> +                                  (uintptr_t) hostaddrs[j], sizes[j], NULL);
> +           }

(For possibly later reference, quoting my earlier comment: "Per the
earlier '[OpenACC] GOMP_MAP_ATTACH handling in find_group_last', we
should never have more than one 'GOMP_MAP_ATTACH' following something
else (right?), but it's still OK to leave this in this generic form --
unless you want to add some 'assert'ing here.")

> +
> +       bool processed = false;
> +
> +       struct target_mem_desc *tgt = n->tgt;
> +       for (size_t j = 0; j < tgt->list_count; j++)
> +         if (tgt->list[j].key == n)
> +           {
> +             /* We are processing a group of mappings (e.g.
> +                [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
> +                Find the right group in the target_mem_desc's variable
> +                list, and increment the refcounts for each item in that
> +                group.  */

Thanks.

> +             for (size_t k = 0; k < groupnum; k++)
> +               if (j + k < tgt->list_count && tgt->list[j + k].key)
> +                 {
> +                   tgt->list[j + k].key->refcount++;
> +                   tgt->list[j + k].key->dynamic_refcount++;
> +                 }
> +             processed = true;
> +             break;
> +           }

So the 'break' added here (compared to the last version) means that
there's always only one such "group of mappings" that needs to be
handled.

> +
> +       if (!processed)
> +         {
> +           gomp_mutex_unlock (&acc_dev->lock);
> +           gomp_fatal ("dynamic refcount incrementing failed for "
> +                       "pointer/pset");
> +         }
> +     }

> +      else if (hostaddrs[i])
> +     {
> +       /* The data is not mapped already.  Map it now, unless the first
> +          member in the group has a NULL pointer (e.g. a non-present
> +          optional parameter).  */

Thanks.  The handling of case: not present, and 'hostaddrs[i] == NULL'
still seems a bit "bumpy" to me, but we can refine that later, if
necessary.  (For possibly later reference, quoting my earlier comment:
"[this case] is exercised by
'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only)".

> +       gomp_mutex_unlock (&acc_dev->lock);
> +
> +       struct target_mem_desc *tgt
> +         = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
> +                                &sizes[i], &kinds[i], true,
> +                                GOMP_MAP_VARS_ENTER_DATA);
> +       assert (tgt);
> +
> +       gomp_mutex_lock (&acc_dev->lock);
> +
> +       for (size_t j = 0; j < tgt->list_count; j++)
> +         {
> +           n = tgt->list[j].key;
> +           if (n)
> +             n->dynamic_refcount++;
> +         }
> +     }
>
>        i = group_last;
>      }
> +
> +  gomp_mutex_unlock (&acc_dev->lock);
>  }

> @@ -1165,54 +1286,7 @@ goacc_exit_data_internal (struct gomp_device_descr 
> *acc_dev, size_t mapnum,
>           if (n == NULL)
>             continue;
>
> -[refcounting]
> -[copyfrom]
> -['is_tgt_unmapped' checking]
> +         goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);

ACK, thanks.  :-)

> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c
> @@ -138,12 +138,8 @@ test_acc_data ()
>      fprintf (stderr, "CheCKpOInT1\n");
>      // { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
>      acc_copyout_finalize (h, sizeof h);
> -    //TODO     goacc_exit_datum: Assertion `is_tgt_unmapped || num_mappings 
> > 1' failed.
> -    //TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! 
> openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case 
> (without actually XFAILing).
> -    //TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... 
> instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
> -    //TODO { dg-final { if { [dg-process-target { xfail { ! 
> openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is 
> XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
>      fprintf (stderr, "CheCKpOInT2\n");
> -    // { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { 
> openacc_host_selected } } }
> +    // { dg-output ".CheCKpOInT2(\n|\r\n|\r)" }
>      assert (acc_is_present (h, sizeof h));
>      verify_array (h, N, c1);

This 'dg-output' checking now fails; the output is:

    CheCKpOInT1
    CheCKpOInT2

..., that is, no character before 'CheCKpOInT2' (previously a benign typo
in that 'dg-output', originating from matching what Fortran 'print'
prints).  As there is no point in verifying these checkpoints if the item
in between now no longer fails, we should just complete remove them, like
I had done in
<87tuyoy5bb.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87tuyoy5bb.fsf@euler.schwinge.homeip.net>
(see attached incremental patch).

> --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
> @@ -40,15 +40,7 @@ program dtype
>    if (.not. acc_is_present(var%a(5:n - 5))) stop 11
>    if (.not. acc_is_present(var%b(5:n - 5))) stop 12
>    if (.not. acc_is_present(var)) stop 13
> -  print *, "CheCKpOInT1"
> -  ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
>  !$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize
> -  !TODO     goacc_exit_data_internal: Assertion `is_tgt_unmapped || 
> num_mappings > 1' failed.
> -  !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! 
> openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case 
> (without actually XFAILing).
> -  !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... 
> instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
> -  !TODO { dg-final { if { [dg-process-target { xfail { ! 
> openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is 
> XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
> -  print *, "CheCKpOInT2"
> -  ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected 
> } } }
>    if (acc_get_device_type() .ne. acc_device_host) then
>       if (acc_is_present(var%a(5:n - 5))) stop 21
>       if (acc_is_present(var%b(5:n - 5))) stop 22

ACK, but also need to adjust
'libgomp.oacc-fortran/deep-copy-6-no_finalize.F90' (see attached
incremental patch), or turn it into past tense if you like that better
maybe?

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90

> +[...]
> +  if (.not.acc_is_present (a)) stop 1

Needs to be skipped for shared-memory devices (see incremental patch
attached).

> --- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
> @@ -4,6 +4,3 @@
>  /* Nullify the 'finalize' clause, which disturbs reference counting.  */

That comment now no longer applies; either adjust it as I had in
<87k0zozmp9.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87k0zozmp9.fsf@euler.schwinge.homeip.net>
(see attached incrememntal patch), or turn it into past tense if you like
that better maybe?

>  #define finalize
>  #include "mdc-refcount-1-1-1.f90"
> -
> -! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
> -! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" }

(ACK.)

> --- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
> @@ -26,8 +26,7 @@ program main
>    print *, "CheCKpOInT1"
>    ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
>    !$acc exit data detach(var%a) finalize
> -  !TODO     goacc_exit_data_internal: Assertion `is_tgt_unmapped || 
> num_mappings > 1' failed.
> -  !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! 
> openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case 
> (without actually XFAILing).
> +  !TODO { dg-output ".*libgomp: attach count underflow(\n|\r\n|\r)$" { 
> target { ! openacc_host_selected } } } ! Scan for what we expect in the 
> "XFAILed" case (without actually XFAILing).
>    !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... 
> instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
>    !TODO { dg-final { if { [dg-process-target { xfail { ! 
> openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is 
> XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
>    print *, "CheCKpOInT2"

To match what's changed/what's now actually happening, let's use what I
had in
<87k0zozmp9.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87k0zozmp9.fsf@euler.schwinge.homeip.net>
(see attached incrememntal patch).


Grüße
 Thomas


-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
>From 2c037be969c4bd2a3c9b1f0f5778483dd8e9a8c6 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <tho...@codesourcery.com>
Date: Fri, 10 Jul 2020 13:45:30 +0200
Subject: [PATCH] into "openacc: Adjust dynamic reference count semantics"

---
 libgomp/oacc-mem.c                                          | 2 +-
 libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c     | 5 -----
 .../libgomp.oacc-fortran/deep-copy-6-no_finalize.F90        | 5 +----
 .../libgomp.oacc-fortran/dynamic-incr-structural-1.f90      | 1 +
 .../testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90   | 2 +-
 .../testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90   | 6 +++---
 6 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 2d0badfcb2e..855cad84391 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -1140,7 +1140,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	    gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
 				 (uintptr_t) h, s, NULL);
 
-	  goacc_map_var_existing (acc_dev, h, s, aq, n);
+	  goacc_map_var_existing (acc_dev, h, s, n);
 	}
       else if (n && groupnum > 1)
 	{
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c
index 2c34c6eef43..db5b35b08d9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c
@@ -4,7 +4,6 @@
 /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
 
 #include <assert.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <openacc.h>
 
@@ -135,11 +134,7 @@ test_acc_data ()
     assert (acc_is_present (h, sizeof h));
 
     assign_array (h, N, c1);
-    fprintf (stderr, "CheCKpOInT1\n");
-    // { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
     acc_copyout_finalize (h, sizeof h);
-    fprintf (stderr, "CheCKpOInT2\n");
-    // { dg-output ".CheCKpOInT2(\n|\r\n|\r)" }
     assert (acc_is_present (h, sizeof h));
     verify_array (h, N, c1);
 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90
index ed4f10e7a3f..038f04a3c37 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6-no_finalize.F90
@@ -1,8 +1,5 @@
 ! { dg-do run }
 
-/* Nullify the 'finalize' clause, which disturbs reference counting.  */
+/* Nullify the 'finalize' clause.  */
 #define finalize
 #include "deep-copy-6.f90"
-
-! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
-! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90
index a5b9d948b38..6b17b1dbbc9 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/dynamic-incr-structural-1.f90
@@ -1,4 +1,5 @@
 ! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
 
 program map_multi
   use openacc
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
index d12ea824609..4307f50c46e 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-1-2.F90
@@ -1,6 +1,6 @@
 ! { dg-do run }
 ! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
 
-/* Nullify the 'finalize' clause, which disturbs reference counting.  */
+/* Nullify the 'finalize' clause.  */
 #define finalize
 #include "mdc-refcount-1-1-1.f90"
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
index 0c21a8705ac..fbd52373946 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
@@ -23,15 +23,15 @@ program main
   if (.not. acc_is_present(var%a)) stop 1
   if (.not. acc_is_present(var)) stop 2
 
+  !$acc exit data detach(var%a) finalize
   print *, "CheCKpOInT1"
   ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
-  !$acc exit data detach(var%a) finalize
-  !TODO { dg-output ".*libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+  !$acc exit data delete(var%a)
+  !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
   !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
   !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
   print *, "CheCKpOInT2"
   ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
-  !$acc exit data delete(var%a)
   if (acc_is_present(var%a)) stop 3
   if (.not. acc_is_present(var)) stop 4
 
-- 
2.17.1

Reply via email to