This is a new version of the patch last sent here: https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546332.html
Minus the bits that Thomas has committed already (thanks!), and with adjustments to allow for GOMP_MAP_ATTACH being grouped together with a preceding clause. OK? Julian ChangeLog libgomp/ * libgomp.h (struct splay_tree_key_s): Change virtual_refcount to dynamic_refcount. (struct gomp_device_descr): Remove GOMP_MAP_VARS_OPENACC_ENTER_DATA. * oacc-mem.c (acc_map_data): Substitute virtual_refcount for dynamic_refcount. (goacc_enter_datum): Adjust for dynamic_refcount semantics. (goacc_exit_datum): Re-add some error checking. Adjust for dynamic_refcount semantics. (goacc_enter_data_internal): Implement "present" case of dynamic memory-map handling here. Update "non-present" case for dynamic_refcount semantics. (goacc_exit_data_internal): Update for dynamic_refcount semantics. * target.c (gomp_map_vars_internal): Remove GOMP_MAP_VARS_OPENACC_ENTER_DATA handling. Update for dynamic_refcount handling. (gomp_unmap_vars_internal): Remove virtual_refcount handling. (gomp_load_image_to_device): Substitute dynamic_refcount for virtual_refcount. libgomp/ * testsuite/libgomp.oacc-c-c++-common/refcounting-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/refcounting-2.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-6.f90: Remove XFAILs. --- libgomp/libgomp.h | 8 +- libgomp/oacc-mem.c | 155 ++++++++++++++---- libgomp/target.c | 38 +---- .../libgomp.oacc-c-c++-common/refcounting-1.c | 31 ++++ .../libgomp.oacc-c-c++-common/refcounting-2.c | 31 ++++ .../libgomp.oacc-fortran/deep-copy-6.f90 | 6 +- 6 files changed, 201 insertions(+), 68 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index ca42e0de640..7b52ce7d5c2 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1016,11 +1016,8 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Reference counts beyond those that represent genuine references in the - linked splay tree key/target memory structures, e.g. for multiple OpenACC - "present increment" operations (via "acc enter data") referring to the same - host-memory block. */ - uintptr_t virtual_refcount; + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; struct splay_tree_aux *aux; }; @@ -1153,7 +1150,6 @@ struct gomp_device_descr enum gomp_map_vars_kind { GOMP_MAP_VARS_OPENACC, - GOMP_MAP_VARS_OPENACC_ENTER_DATA, GOMP_MAP_VARS_TARGET, GOMP_MAP_VARS_DATA, GOMP_MAP_VARS_ENTER_DATA diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index be7f8d600eb..bc64bebe6c1 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -409,7 +409,7 @@ acc_map_data (void *h, void *d, size_t s) splay_tree_key n = tgt->list[0].key; assert (n); assert (n->refcount == 1); - assert (n->virtual_refcount == 0); + assert (n->dynamic_refcount == 0); /* Special reference counting behavior. */ n->refcount = REFCOUNT_INFINITY; @@ -456,7 +456,7 @@ acc_unmap_data (void *h) (void *) n->host_start, (int) host_size, (void *) h); } /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from - 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating + 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating the different 'REFCOUNT_INFINITY' cases, or simply separate 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' etc.)? */ @@ -545,10 +545,8 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) assert (n->refcount != REFCOUNT_LINK); if (n->refcount != REFCOUNT_INFINITY) - { - n->refcount++; - n->virtual_refcount++; - } + n->refcount++; + n->dynamic_refcount++; gomp_mutex_unlock (&acc_dev->lock); } @@ -562,13 +560,14 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) struct target_mem_desc *tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, - kinds, true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); + kinds, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); n = tgt->list[0].key; assert (n); assert (n->refcount == 1); - assert (n->virtual_refcount == 0); + assert (n->dynamic_refcount == 0); + n->dynamic_refcount++; d = (void *) tgt->tgt_start; } @@ -689,23 +688,28 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) (void *) h, (int) s, (void *) n->host_start, (int) host_size); } + assert (n->refcount != REFCOUNT_LINK); + if (n->refcount != REFCOUNT_INFINITY + && n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } + bool finalize = (kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FORCE_FROM); if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->virtual_refcount; - n->virtual_refcount = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - - if (n->virtual_refcount > 0) + else if (n->dynamic_refcount) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->virtual_refcount--; + n->dynamic_refcount--; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) - n->refcount--; if (n->refcount == 0) { @@ -1048,13 +1052,111 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, { for (size_t i = 0; i < mapnum; i++) { - int group_last = find_group_last (i, mapnum, sizes, kinds); + splay_tree_key n; + size_t group_last = find_group_last (i, mapnum, sizes, kinds); + bool struct_p = false; + size_t size, groupnum = (group_last - i) + 1; + + switch (kinds[i] & 0xff) + { + case GOMP_MAP_STRUCT: + { + int last = i + sizes[i]; + size = (uintptr_t) hostaddrs[last] + sizes[last] + - (uintptr_t) hostaddrs[i]; + struct_p = true; + } + break; + + case GOMP_MAP_ATTACH: + size = sizeof (void *); + break; + + default: + size = sizes[i]; + } + + n = lookup_host (acc_dev, hostaddrs[i], size); + + if (n && struct_p) + { + if (n->refcount != REFCOUNT_INFINITY) + n->refcount += groupnum - 1; + n->dynamic_refcount += groupnum - 1; + gomp_mutex_unlock (&acc_dev->lock); + } + else if (n && groupnum == 1) + { + void *h = hostaddrs[i]; + size_t s = sizes[i]; + + /* A standalone attach clause. */ + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, + (uintptr_t) h, s, NULL); + else if (h + s > (void *) n->host_end) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); + } + + assert (n->refcount != REFCOUNT_LINK); + if (n->refcount != REFCOUNT_INFINITY) + n->refcount++; + n->dynamic_refcount++; - gomp_map_vars_async (acc_dev, aq, - (group_last - i) + 1, - &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, - GOMP_MAP_VARS_OPENACC_ENTER_DATA); + gomp_mutex_unlock (&acc_dev->lock); + } + else if (n && groupnum > 1) + { + assert (n->refcount != REFCOUNT_INFINITY + && n->refcount != REFCOUNT_LINK); + + for (size_t j = i + 1; j <= group_last; j++) + if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH) + { + splay_tree_key m + = lookup_host (acc_dev, hostaddrs[j], sizeof (void *)); + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m, + (uintptr_t) hostaddrs[j], sizes[j], NULL); + } + + bool processed = false; + + struct target_mem_desc *tgt = n->tgt; + for (size_t j = 0; j < tgt->list_count; j++) + if (tgt->list[j].key == n) + { + for (size_t k = 0; k < groupnum; k++) + if (j + k < tgt->list_count && tgt->list[j + k].key) + { + tgt->list[j + k].key->refcount++; + tgt->list[j + k].key->dynamic_refcount++; + } + processed = true; + } + + gomp_mutex_unlock (&acc_dev->lock); + if (!processed) + gomp_fatal ("dynamic refcount incrementing failed for " + "pointer/pset"); + } + else if (hostaddrs[i]) + { + gomp_mutex_unlock (&acc_dev->lock); + + struct target_mem_desc *tgt + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); + for (size_t j = 0; j < tgt->list_count; j++) + { + n = tgt->list[j].key; + if (n) + n->dynamic_refcount++; + } + } i = group_last; } @@ -1148,18 +1250,15 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->virtual_refcount; - n->virtual_refcount = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - - if (n->virtual_refcount > 0) + else if (n->dynamic_refcount) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->virtual_refcount--; + n->dynamic_refcount--; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) - n->refcount--; if (copyfrom && (kind != GOMP_MAP_FROM || n->refcount == 0)) diff --git a/libgomp/target.c b/libgomp/target.c index 36425477dcb..3f2becdae0e 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -666,8 +666,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA - || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; tgt->prev = NULL; struct gomp_coalesce_buf cbuf, *cbufp = NULL; @@ -1094,7 +1093,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].copy_from = false; tgt->list[i].always_copy_from = false; tgt->list[i].do_detach - = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA); + = (pragma_kind != GOMP_MAP_VARS_ENTER_DATA); n->refcount++; } else @@ -1155,7 +1154,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1294,20 +1293,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ - if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA - || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) - && tgt->refcount == 0) - { - /* If we're about to discard a target_mem_desc with no "structural" - references (tgt->refcount == 0), any splay keys linked in the tgt's - list must have their virtual refcount incremented to represent that - "lost" reference in order to implement the semantics of the OpenACC - "present increment" operation properly. */ - if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) - for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key) - tgt->list[i].key->virtual_refcount++; - + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { free (tgt); tgt = NULL; } @@ -1459,14 +1446,7 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, continue; bool do_unmap = false; - if (k->tgt == tgt - && k->virtual_refcount > 0 - && k->refcount != REFCOUNT_INFINITY) - { - k->virtual_refcount--; - k->refcount--; - } - else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) k->refcount--; else if (k->refcount == 1) { @@ -1631,7 +1611,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_table[i].start; k->refcount = REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -1665,7 +1645,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_var->start; k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -2935,7 +2915,7 @@ omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, k->tgt = tgt; k->tgt_offset = (uintptr_t) device_ptr + device_offset; k->refcount = REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c new file mode 100644 index 00000000000..4e6d06d48d5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c @@ -0,0 +1,31 @@ +/* Test dynamic unmapping of separate structure members. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <assert.h> +#include <openacc.h> + +struct s +{ + char a; + char b; +}; + +int main () +{ + struct s s; + +#pragma acc enter data create(s.a, s.b) + + assert (acc_is_present (&s.a, sizeof s.a)); + assert (acc_is_present (&s.b, sizeof s.b)); + +#pragma acc exit data delete(s.a) +#pragma acc exit data delete(s.b) + + assert (!acc_is_present (&s.a, sizeof s.a)); + assert (!acc_is_present (&s.b, sizeof s.b)); + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c new file mode 100644 index 00000000000..5539fd8d57f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c @@ -0,0 +1,31 @@ +/* Test dynamic unmapping of separate structure members. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <assert.h> +#include <openacc.h> + +struct s +{ + char a; + char b; +}; + +int main () +{ + struct s s; + +#pragma acc enter data create(s.a, s.b) + + assert (acc_is_present (&s.a, sizeof s.a)); + assert (acc_is_present (&s.b, sizeof s.b)); + + acc_delete (&s.a, sizeof s.a); + acc_delete (&s.b, sizeof s.b); + + assert (!acc_is_present (&s.a, sizeof s.a)); + assert (!acc_is_present (&s.b, sizeof s.b)); + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 index 5837a403910..eb7d3ca160e 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 @@ -43,12 +43,8 @@ program dtype print *, "CheCKpOInT1" ! { dg-output ".*CheCKpOInT1(\n|\r\n|\r)" } !$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize - !TODO goacc_exit_data_internal: Assertion `is_tgt_unmapped || num_mappings > 1' failed. - !TODO { dg-output ".*\[Aa\]ssert.*is_tgt_unmapped" { target { ! openacc_host_selected } } } ! Scan for what we expect in the "XFAILed" case (without actually XFAILing). - !TODO { dg-shouldfail "XFAILed" { ! openacc_host_selected } } ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all. - !TODO { dg-final { if { [dg-process-target { xfail { ! openacc_host_selected } }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } } ! ... so that we still get an XFAIL visible in the log. print *, "CheCKpOInT2" - ! { dg-output ".CheCKpOInT2(\n|\r\n|\r)" { target { openacc_host_selected } } } + ! { dg-output ".*CheCKpOInT2(\n|\r\n|\r)" } if (acc_get_device_type() .ne. acc_device_host) then if (acc_is_present(var%a(5:n - 5))) stop 21 if (acc_is_present(var%b(5:n - 5))) stop 22 -- 2.23.0