Hi Julian!
On 2020-06-05T21:31:08+0100, Julian Brown <[email protected]> wrote:
> On Fri, 5 Jun 2020 13:17:09 +0200
> Thomas Schwinge <[email protected]> wrote:
>> On 2019-12-17T21:03:47-0800, Julian Brown <[email protected]>
>> wrote:
>> > This part contains the libgomp runtime support for the
>> > GOMP_MAP_ATTACH and GOMP_MAP_DETACH mapping kinds
>>
>> > --- a/libgomp/target.c
>> > +++ b/libgomp/target.c
>>
>> > @@ -1203,6 +1211,32 @@ gomp_map_vars_internal (struct gomp_device_descr
>> > *devicep,
>>
>> > + case GOMP_MAP_ATTACH:
>> > + {
>> > + cur_node.host_start = (uintptr_t) hostaddrs[i];
>> > + cur_node.host_end = cur_node.host_start + sizeof (void *);
>> > + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
>> > + if (n != NULL)
>> > + {
>> > + tgt->list[i].key = n;
>> > + tgt->list[i].offset = cur_node.host_start - n->host_start;
>> > + tgt->list[i].length = n->host_end - n->host_start;
>> > + tgt->list[i].copy_from = false;
>> > + tgt->list[i].always_copy_from = false;
>> > + tgt->list[i].do_detach
>> > + = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
>> > + n->refcount++;
>> > + }
>> > + else
>> > + {
>> > + gomp_mutex_unlock (&devicep->lock);
>> > + gomp_fatal ("outer struct not mapped for attach");
>> > + }
>> > + gomp_attach_pointer (devicep, aq, mem_map, n,
>> > + (uintptr_t) hostaddrs[i], sizes[i],
>> > + cbufp);
>> > + continue;
>> > + }
>>
>> For the OpenACC runtime API 'acc_attach' etc. routines they don't, so
>> what's the conceptual reason that for the corresponding OpenACC
>> directive variants, 'GOMP_MAP_ATTACH' etc. here participate in
>> reference counting ('n->refcount++' above)? I understand OpenACC
>> 'attach'/'detach' clauses to be simple "executable clauses", which
>> just update some values somewhere (say, like
>> 'GOMP_MAP_ALWAYS_POINTER'), but they don't alter any mapping state,
>> thus wouldn't appear to need reference counting?
>
> IIUC, n->refcount is not directly the "structural reference count" as
> seen at source level, but rather counts the number of target_var_descs
> in the lists appended to each target_mem_desc -- and GOMP_MAP_ATTACH
> have variable entries in those lists.
That may be OK if that's purely an implementation detail that isn't
visible to the user, however:
> That's not the case for the API
> routines.
As I had mentioned, the problem is: in contrast to 'acc_attach', an
OpenACC 'enter data' directive with 'attach' clause currently uses this
same reference-counted code path, and thus such an 'attach' without
corresponding 'detach' inhibits unmapping; see
'libgomp.oacc-c-c++-common/mdc-refcount-1.c' in the attached patch
"OpenACC 'attach'/'detach' has no business affecting user-visible
reference counting".
That patch seemed to be the logical next step then, to unify the code
paths for 'acc_attach' and 'enter data' directive with 'attach' clause
(which have to act in the same way). That's (conceptually) somewhat
similar to what you had proposed as part of
<b23ea71697f77d8214411a3e1348e9dee496e5a6.1590182783.git.julian@codesourcery.com">http://mid.mail-archive.com/b23ea71697f77d8214411a3e1348e9dee496e5a6.1590182783.git.julian@codesourcery.com>.
(But all these things really need to be discussed individually...)
However, that patch regresses
'libgomp.oacc-fortran/deep-copy-6-no_finalize.F90', and also the
'deep-copy-7b2f-2.c', and 'deep-copy-7cf.c' that I'm attaching here. I
have not yet made an attempts to understand these regressions. It may be
that a Detach Action actually effects an (attached) device pointer being
copied back to the host, and then disturbing things -- and if that, then
it may be a bug in libgomp, or in the test case. ;-)
Grüße
Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander
Walter
>From d99a701387054259419292b95462f3646a00d6d9 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <[email protected]>
Date: Mon, 8 Jun 2020 21:35:32 +0200
Subject: [PATCH] OpenACC 'attach'/'detach' has no business affecting
user-visible reference counting
In particular, an 'attach' without 'detach' must not inhibit unmapping.
libgomp/
* oacc-mem.c (goacc_attach_internal): New function, split out of
'acc_attach_async'.
(acc_attach, goacc_enter_data_internal): Use it.
(goacc_exit_data_internal) <GOMP_MAP_DETACH,
GOMP_MAP_FORCE_DETACH>: Skip unmapping.
* target.c (gomp_map_vars_existing): Assert not 'GOMP_MAP_ATTACH'.
(gomp_map_vars_internal) <GOMP_MAP_ATTACH>: Assert this
is not an 'enter data'.
* testsuite/libgomp.oacc-c-c++-common/mdc-refcount-1.c: New file.
* testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90: Adjust.
---
libgomp/oacc-mem.c | 51 +++++---
libgomp/target.c | 21 ++-
.../mdc-refcount-1.c | 123 ++++++++++++++++++
.../mdc-refcount-1-4-1.f90 | 7 +-
4 files changed, 176 insertions(+), 26 deletions(-)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/mdc-refcount-1.c
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 936ae649dd9..0758f59ec3c 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -881,12 +881,11 @@ acc_update_self_async (void *h, size_t s, int async)
update_dev_host (0, h, s, async);
}
-void
-acc_attach_async (void **hostaddr, int async)
+static void
+goacc_attach_internal (goacc_aq aq, void **hostaddr, size_t bias)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
- goacc_aq aq = get_goacc_asyncqueue (async);
struct splay_tree_key_s cur_node;
splay_tree_key n;
@@ -907,15 +906,22 @@ acc_attach_async (void **hostaddr, int async)
}
gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
- 0, NULL);
+ bias, NULL);
gomp_mutex_unlock (&acc_dev->lock);
}
+void
+acc_attach_async (void **hostaddr, int async)
+{
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ goacc_attach_internal (aq, hostaddr, 0);
+}
+
void
acc_attach (void **hostaddr)
{
- acc_attach_async (hostaddr, acc_async_sync);
+ goacc_attach_internal (NULL, hostaddr, 0);
}
static void
@@ -1034,11 +1040,22 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
{
int group_last = find_group_last (i, mapnum, sizes, kinds);
- gomp_map_vars_async (acc_dev, aq,
- (group_last - i) + 1,
- &hostaddrs[i], NULL,
- &sizes[i], &kinds[i], true,
- GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ unsigned char kind = kinds[i] & 0xff;
+ switch (kind)
+ {
+ case GOMP_MAP_ATTACH:
+ assert (group_last == i);
+ goacc_attach_internal (aq, /*TODO is that type cast alright? */ (void **) hostaddrs[i], sizes[i]);
+ /* Doesn't use reference counting. */
+ break;
+ default:
+ gomp_map_vars_async (acc_dev, aq,
+ (group_last - i) + 1,
+ &hostaddrs[i], NULL,
+ &sizes[i], &kinds[i], true,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ break;
+ }
i = group_last;
}
@@ -1094,12 +1111,16 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
bool finalize = false;
if (kind == GOMP_MAP_FORCE_FROM
- || kind == GOMP_MAP_DELETE
- || kind == GOMP_MAP_FORCE_DETACH)
+ || kind == GOMP_MAP_DELETE)
finalize = true;
switch (kind)
{
+ case GOMP_MAP_DETACH:
+ case GOMP_MAP_FORCE_DETACH:
+ /* Handled above; doesn't use reference counting. */
+ break;
+
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_FROM:
case GOMP_MAP_ALWAYS_FROM:
@@ -1110,14 +1131,10 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
case GOMP_MAP_POINTER:
case GOMP_MAP_DELETE:
case GOMP_MAP_RELEASE:
- case GOMP_MAP_DETACH:
- case GOMP_MAP_FORCE_DETACH:
{
struct splay_tree_key_s cur_node;
size_t size;
- if (kind == GOMP_MAP_POINTER
- || kind == GOMP_MAP_DETACH
- || kind == GOMP_MAP_FORCE_DETACH)
+ if (kind == GOMP_MAP_POINTER)
size = sizeof (void *);
else
size = sizes[i];
diff --git a/libgomp/target.c b/libgomp/target.c
index 36425477dcb..2197067a9a3 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -357,10 +357,12 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
splay_tree_key newn, struct target_var_desc *tgt_var,
unsigned char kind, struct gomp_coalesce_buf *cbuf)
{
+ assert (kind != GOMP_MAP_ATTACH);
+
tgt_var->key = oldn;
tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
- tgt_var->do_detach = kind == GOMP_MAP_ATTACH;
+ tgt_var->do_detach = false; //TODO Not 'newn->do_detach', right?
tgt_var->offset = newn->host_start - oldn->host_start;
tgt_var->length = newn->host_end - newn->host_start;
@@ -810,13 +812,15 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
}
else if ((kind & typemask) == GOMP_MAP_ATTACH)
{
+ assert (pragma_kind != GOMP_MAP_VARS_ENTER_DATA
+ && pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+
tgt->list[i].key = NULL;
has_firstprivate = true;
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
- if (!GOMP_MAP_POINTER_P (kind & typemask)
- && (kind & typemask) != GOMP_MAP_ATTACH)
+ if (!GOMP_MAP_POINTER_P (kind & typemask))
cur_node.host_end = cur_node.host_start + sizes[i];
else
cur_node.host_end = cur_node.host_start + sizeof (void *);
@@ -1083,6 +1087,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
continue;
case GOMP_MAP_ATTACH:
{
+ assert (pragma_kind != GOMP_MAP_VARS_ENTER_DATA
+ && pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
@@ -1093,8 +1100,12 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->list[i].length = n->host_end - n->host_start;
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
- tgt->list[i].do_detach
- = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ tgt->list[i].do_detach = true;
+ /* OpenACC 'attach'/'detach' has no business affecting
+ user-visible reference counting, but the following
+ adjustment of the structured reference counter ('data'
+ construct), this is just an implementation detail,
+ isn't visible to the user. */
n->refcount++;
}
else
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/mdc-refcount-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/mdc-refcount-1.c
new file mode 100644
index 00000000000..d5eb167ca07
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/mdc-refcount-1.c
@@ -0,0 +1,123 @@
+/* Verify that OpenACC 'attach'/'detach' doesn't interfere with reference
+ counting. */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+/* Need to shared this (and, in particular, implicit '&data_work' in
+ 'attach'/'detach' clauses) between 'test' and 'test_'. */
+static unsigned char *data_work;
+
+static void test_(unsigned variant,
+ unsigned char *data,
+ void *data_d)
+{
+ assert(acc_is_present(&data_work, sizeof data_work));
+ assert(data_work == data);
+
+ acc_update_self(&data_work, sizeof data_work);
+ assert(data_work == data);
+
+ if (variant & 1)
+ {
+#pragma acc enter data attach(data_work)
+ }
+ else
+ acc_attach((void **) &data_work);
+ acc_update_self(&data_work, sizeof data_work);
+ assert(data_work == data_d);
+
+ if (variant & 4)
+ {
+ if (variant & 2)
+ { // attach some more
+ data_work = data;
+ acc_attach((void **) &data_work);
+#pragma acc enter data attach(data_work)
+ acc_attach((void **) &data_work);
+#pragma acc enter data attach(data_work)
+#pragma acc enter data attach(data_work)
+#pragma acc enter data attach(data_work)
+ acc_attach((void **) &data_work);
+ acc_attach((void **) &data_work);
+#pragma acc enter data attach(data_work)
+ }
+ else
+ {}
+ }
+ else
+ { // detach
+ data_work = data;
+ if (variant & 2)
+ {
+#pragma acc exit data detach(data_work)
+ }
+ else
+ acc_detach((void **) &data_work);
+ acc_update_self(&data_work, sizeof data_work);
+ assert(data_work == data);
+
+ // now not attached anymore
+
+#if 0
+ if (TODO)
+ {
+ acc_detach(&data_work); //TODO PR95203 "libgomp: attach count underflow"
+ acc_update_self(&data_work, sizeof data_work);
+ assert(data_work == data);
+ }
+#endif
+ }
+
+ assert(acc_is_present(&data_work, sizeof data_work));
+}
+
+static void test(unsigned variant)
+{
+ const int size = sizeof (void *) + 1; // In sweet memory of PR95270.
+ unsigned char *data = (unsigned char *) malloc(size);
+ assert(data);
+ void *data_d = acc_create(data, size);
+ assert(data_d);
+ assert(acc_is_present(data, size));
+
+ data_work = data;
+
+ if (variant & 8)
+ {
+#pragma acc data copyin(data_work)
+ test_(variant, data, data_d);
+ }
+ else
+ {
+ acc_copyin(&data_work, sizeof data_work);
+ test_(variant, data, data_d);
+ acc_delete(&data_work, sizeof data_work);
+ }
+#if ACC_MEM_SHARED
+ assert(acc_is_present(&data_work, sizeof data_work));
+#else
+ assert(!acc_is_present(&data_work, sizeof data_work));
+#endif
+ data_work = NULL;
+
+ assert(acc_is_present(data, size));
+ acc_delete(data, size);
+ data_d = NULL;
+#if ACC_MEM_SHARED
+ assert(acc_is_present(data, size));
+#else
+ assert(!acc_is_present(data, size));
+#endif
+ free(data);
+ data = NULL;
+}
+
+int main()
+{
+ for (size_t i = 0; i < 16; ++i)
+ test(i);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
index b22e411567f..fbd52373946 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/mdc-refcount-1-4-1.f90
@@ -23,16 +23,15 @@ program main
if (.not. acc_is_present(var%a)) stop 1
if (.not. acc_is_present(var)) stop 2
+ !$acc exit data detach(var%a) finalize
print *, "CheCKpOInT1"
! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" }
- !$acc exit data detach(var%a) finalize
- !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed.
- !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ !$acc exit data delete(var%a)
+ !TODO { dg-output "(\n|\r\n|\r)libgomp: attach count underflow(\n|\r\n|\r)$" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
!TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
!TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log.
print *, "CheCKpOInT2"
! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } }
- !$acc exit data delete(var%a)
if (acc_is_present(var%a)) stop 3
if (.not. acc_is_present(var)) stop 4
--
2.17.1
/* { dg-do run { target { ! openacc_host_selected } } } */
#include <stdlib.h>
#include <assert.h>
#include <openacc.h>
struct dc
{
int a;
int *b;
};
int
main ()
{
int n = 100, i, j, k;
struct dc v = { .a = 3 };
v.b = (int *) malloc (sizeof (int) * n);
for (k = 0; k < 16; k++)
{
/* Here, we do not explicitly copy the enclosing structure, but work
with fields directly. Make sure attachment counters and reference
counters work properly in that case. */
#pragma acc enter data copyin(v.a, v.b[0:n])
#pragma acc enter data pcopyin(v.b[0:n])
#pragma acc enter data pcopyin(v.b[0:n])
#pragma acc parallel loop present(v.a, v.b)
for (i = 0; i < n; i++)
v.b[i] = v.a + i;
#pragma acc exit data detach(v.b) finalize //NEW
//WORKS
acc_copyout_finalize (v.b, sizeof (int) * n);
#pragma acc exit data delete(v.a)
for (i = 0; i < n; i++)
assert (v.b[i] == v.a + i);
assert (!acc_is_present (&v, sizeof (v)));
assert (!acc_is_present (v.b, sizeof (int) * n));
}
return 0;
}
/* { dg-do run { target { ! openacc_host_selected } } } */
#include <stdlib.h>
#include <assert.h>
#include <openacc.h>
struct dc
{
int a;
int *b;
};
int
main ()
{
int n = 100, i, j, k;
struct dc v = { .a = 3 };
v.b = (int *) malloc (sizeof (int) * n);
for (k = 0; k < 16; k++)
{
/* Here, we do not explicitly copy the enclosing structure, but work
with fields directly. Make sure attachment counters and reference
counters work properly in that case. */
#pragma acc enter data copyin(v.a, v.b[0:n])
#pragma acc enter data pcopyin(v.b[0:n])
#pragma acc enter data pcopyin(v.b[0:n])
#pragma acc parallel loop present(v.a, v.b)
for (i = 0; i < n; i++)
v.b[i] = v.a + i;
#pragma acc exit data detach(v.b) finalize //NEW
acc_copyout_finalize (v.b, sizeof (int) * n);
acc_delete (&v.a, sizeof (v.a));
for (i = 0; i < n; i++)
assert (v.b[i] == v.a + i);
assert (!acc_is_present (&v, sizeof (v)));
assert (!acc_is_present (v.b, sizeof (int) * n));
}
return 0;
}