Hi Thomas,

On 2024/3/15 7:24 PM, Thomas Schwinge wrote:
> Hi Chung-Lin!
> 
> I realized: please add "PR libgomp/92840" to the Git commit log, as your
> changes are directly a continuation of my earlier changes.

Okay, I'll remember to do that.

...
>     -      if (n->refcount != REFCOUNT_INFINITY)
>     +      if (n->refcount != REFCOUNT_INFINITY
>     +   && n->refcount != REFCOUNT_ACC_MAP_DATA)
>       n->refcount--;
>            n->dynamic_refcount--;
>          }
>      
>     +  /* Mappings created by 'acc_map_data' may only be deleted by
>     +     'acc_unmap_data'.  */
>     +  if (n->refcount == REFCOUNT_ACC_MAP_DATA
>     +      && n->dynamic_refcount == 0)
>     +    n->dynamic_refcount = 1;
>     +
>        if (n->refcount == 0)
>          {
>            bool copyout = (kind == GOMP_MAP_FROM
> 
> ..., which really should have the same semantics?  No strong opinion on
> which of the two variants you now chose.

My guess is that breaking off the REFCOUNT_ACC_MAP_DATA case separately will
be lighter on any branch predictors (faster performing overall), so I will
stick with my version here.


>>>
>>> It's not clear to me why you need this handling -- instead of just
>>> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
>>> early 'return'?
>>>
>>> Per my understanding, this code is for OpenACC only exercised for
>>> structured data regions, and it seems strange (unnecessary?) to adjust
>>> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
>>> missing anything?
>>
>> No, that is not true. It goes through almost everything through 
>> gomp_map_vars_existing/_internal.
>> This is what happens when you acc_create/acc_copyin on a mapping created by 
>> acc_map_data.
> 
> But I don't understand what you foresee breaking with the following (on
> top of your v2):
> 
>     --- a/libgomp/target.c
>     +++ b/libgomp/target.c
>     @@ -476,14 +476,14 @@ gomp_free_device_memory (struct gomp_device_descr 
> *devicep, void *devptr)
>      static inline void
>      gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>      {
>     -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
>     +  if (k == NULL
>     +      || k->refcount == REFCOUNT_INFINITY
>     +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
>          return;
>      
>        uintptr_t *refcount_ptr = &k->refcount;
>      
>     -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>     -    refcount_ptr = &k->dynamic_refcount;
>     -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>     +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>          refcount_ptr = &k->structelem_refcount;
...
> Can you please show a test case?

I have re-tested the patch *without* the gomp_increment/decrement_refcount 
changes,
and have these regressions (just to demonstrate what is affected):
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  
execution test
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  
execution test
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  
execution test
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  
execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  
execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  
execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  
execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  
execution test

Now, I have also re-tested your version (aka, just break early and return when 
k->refcount == REFCOUNT_ACC_MAP_DATA)
And for the record, that also works (no regressions).

However, I strongly suggest we use my version here where we adjust the 
dynamic_refcount, simply because: *It is the whole point of this project item 
in OpenACC 2.7*

The 2.7 spec articulated how increment/decrement interacts with 
acc_map_data/acc_unmap_data and this patch was supposed to make libgomp more 
conforming to it implementation-wise.
(otherwise, no point in working on this at all, as there wasn't really anything 
behaviorally wrong about our implementation before)

> I see we already have:
> 
>     if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET
>         && tgt->list_count == 0)
>       {
>         /* 'declare target'.  */
>         assert (n->refcount == REFCOUNT_INFINITY);
> 
> I think I wanted to you to add:
> 
>     --- a/libgomp/oacc-mem.c
>     +++ b/libgomp/oacc-mem.c
>     @@ -1217,7 +1209,8 @@ goacc_enter_data_internal (struct gomp_device_descr 
> *acc_dev, size_t mapnum,
>             processed = true;
>           }
>         else
>     -     assert (n->refcount != REFCOUNT_INFINITY);
>     +     assert (n->refcount != REFCOUNT_INFINITY
>     +             && n->refcount != REFCOUNT_ACC_MAP_DATA);
>      
>         for (size_t j = 0; j < tgt->list_count; j++)
>           if (tgt->list[j].key == n)

I have added this fragment to the patch, thanks.

> 
> Please check these items, and then we're good to go.

I have attached v3 of this patch, of course re-tested without regressions.
If there are no objections I will commit this before end of week (maybe weekend)

Thanks,
Chung-Lin
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index f98cccd8b66..089393846d1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1163,6 +1163,8 @@ struct target_mem_desc;
 /* Special value for refcount - tgt_offset contains target address of the
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
+/* Special value for refcount - created through acc_map_data.  */
+#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
 
 /* Special value for refcount - structure element sibling list items.
    All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index ba48a1ccbb7..d590806b5cd 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
       assert (n->refcount == 1);
       assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
-      n->refcount = REFCOUNT_INFINITY;
+      n->refcount = REFCOUNT_ACC_MAP_DATA;
+      n->dynamic_refcount = 1;
 
       if (profiling_p)
        {
@@ -455,12 +456,7 @@ acc_unmap_data (void *h)
       gomp_fatal ("[%p,%d] surrounds %p",
                  (void *) n->host_start, (int) host_size, (void *) h);
     }
-  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
-     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
-     the different 'REFCOUNT_INFINITY' cases, or simply separate
-     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
-     etc.)?  */
-  else if (n->refcount != REFCOUNT_INFINITY)
+  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
@@ -468,13 +464,12 @@ acc_unmap_data (void *h)
                  (void *) h, (int) host_size);
     }
 
-  struct target_mem_desc *tgt = n->tgt;
+  /* This should hold for all mappings created by acc_map_data. We are however
+     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
+     not matter.  */
+  assert (n->dynamic_refcount >= 1);
 
-  if (tgt->refcount == REFCOUNT_INFINITY)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("cannot unmap target block");
-    }
+  struct target_mem_desc *tgt = n->tgt;
 
   /* Above, we've verified that the mapping must have been set up by
      'acc_map_data'.  */
@@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, 
void *hostaddr,
     }
 
   assert (n->refcount != REFCOUNT_LINK);
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA)
     n->refcount++;
   n->dynamic_refcount++;
 
@@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, 
void *h, size_t s,
 
   assert (n->refcount != REFCOUNT_LINK);
   if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA
       && n->refcount < n->dynamic_refcount)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("Dynamic reference counting assert fail\n");
     }
 
-  if (finalize)
+  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+    {
+      if (finalize)
+       {
+         /* Mappings created by acc_map_data are returned to initial
+            dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
+         n->dynamic_refcount = 1;
+       }
+      else if (n->dynamic_refcount)
+       {
+         /* When mapping is created by acc_map_data, dynamic_refcount must be
+            maintained at >= 1.  */
+         if (n->dynamic_refcount > 1)
+           n->dynamic_refcount--;
+       }
+    }
+  else if (finalize)
     {
       if (n->refcount != REFCOUNT_INFINITY)
        n->refcount -= n->dynamic_refcount;
@@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr 
*acc_dev, size_t mapnum,
            }
          /* This is a special case because we must increment the refcount by
             the number of mapped struct elements, rather than by one.  */
-         if (n->refcount != REFCOUNT_INFINITY)
+         if (n->refcount != REFCOUNT_INFINITY
+             && n->refcount != REFCOUNT_ACC_MAP_DATA)
            n->refcount += groupnum - 1;
          n->dynamic_refcount += groupnum - 1;
        }
@@ -1203,7 +1217,8 @@ goacc_enter_data_internal (struct gomp_device_descr 
*acc_dev, size_t mapnum,
              processed = true;
            }
          else
-           assert (n->refcount != REFCOUNT_INFINITY);
+           assert (n->refcount != REFCOUNT_INFINITY
+                   && n->refcount != REFCOUNT_ACC_MAP_DATA);
 
          for (size_t j = 0; j < tgt->list_count; j++)
            if (tgt->list[j].key == n)
diff --git a/libgomp/target.c b/libgomp/target.c
index bcc86051601..c9dcb8761e5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -481,7 +481,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t 
*refcount_set)
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -528,7 +530,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t 
*refcount_set, bool delete_p,
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -561,6 +565,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t 
*refcount_set, bool delete_p,
   else if (*refcount_ptr > 0)
     *refcount_ptr -= 1;
 
+  /* Force back to 1 if this is an acc_map_data mapping.  */
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
+    *refcount_ptr = 1;
+
  end:
   if (*refcount_ptr == 0)
     {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
new file mode 100644
index 00000000000..7bc55b94f33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+/* Test if acc_unmap_data has implicit finalize semantics.  */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+
+  acc_unmap_data (h);
+
+  if (acc_is_present (h, N))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
index 872f0c1de5c..9ed9fa7e413 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
@@ -10,7 +10,7 @@ main (int argc, char *argv[])
 {
   acc_init (acc_device_default);
   acc_unmap_data ((void *) foo);
-/* { dg-output "libgomp: cannot unmap target block" } */
+/* { dg-output "libgomp: refusing to unmap block 
\\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
   return 0;
 }
 

Reply via email to