This patch adjusts the semantics of dynamic reference counts, as described in the parent email. There are also two new test cases derived from Thomas's test in the email:
https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546166.html that work now. OK? Julian ChangeLog libgomp/ * libgomp.h (struct splay_tree_key_s): Change virtual_refcount to dynamic_refcount. (struct gomp_device_descr): Remove GOMP_MAP_VARS_OPENACC_ENTER_DATA. * oacc-mem.c (acc_map_data): Substitute virtual_refcount for dynamic_refcount. (acc_unmap_data): Replace open-coded refcount handling with call to gomp_remove_var. (goacc_enter_datum): Adjust for dynamic_refcount semantics. Use tgt returned from gomp_map_vars_async. Update assertions. (goacc_exit_datum): Re-add some error checking. Adjust for dynamic_refcount semantics. Fix is_tgt_unmapped test for struct mappings. (goacc_enter_data_internal): Implement "present" case of dynamic memory-map handling here. Update "non-present" case for dynamic_refcount semantics. (goacc_exit_data_internal): Update for dynamic_refcount semantics. Re-introduce error checking for tgt unmapping when appropriate. * target.c (gomp_map_vars_internal): Remove GOMP_MAP_VARS_OPENACC_ENTER_DATA handling. Update for dynamic_refcount handling. (gomp_unmap_vars_internal): Remove virtual_refcount handling. (gomp_load_image_to_device): Substitute dynamic_refcount for virtual_refcount. libgomp/ * testsuite/libgomp.oacc-c-c++-common/refcounting-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/refcounting-2.c: New test. --- libgomp/libgomp.h | 8 +- libgomp/oacc-mem.c | 241 ++++++++++++------ libgomp/target.c | 38 +-- .../libgomp.oacc-c-c++-common/refcounting-1.c | 31 +++ .../libgomp.oacc-c-c++-common/refcounting-2.c | 31 +++ 5 files changed, 243 insertions(+), 106 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index ca42e0de640..7b52ce7d5c2 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1016,11 +1016,8 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Reference counts beyond those that represent genuine references in the - linked splay tree key/target memory structures, e.g. for multiple OpenACC - "present increment" operations (via "acc enter data") referring to the same - host-memory block. */ - uintptr_t virtual_refcount; + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; struct splay_tree_aux *aux; }; @@ -1153,7 +1150,6 @@ struct gomp_device_descr enum gomp_map_vars_kind { GOMP_MAP_VARS_OPENACC, - GOMP_MAP_VARS_OPENACC_ENTER_DATA, GOMP_MAP_VARS_TARGET, GOMP_MAP_VARS_DATA, GOMP_MAP_VARS_ENTER_DATA diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index c06b7341cbb..fff0d573f59 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -407,7 +407,7 @@ acc_map_data (void *h, void *d, size_t s) assert (tgt); splay_tree_key n = tgt->list[0].key; assert (n->refcount == 1); - assert (n->virtual_refcount == 0); + assert (n->dynamic_refcount == 0); /* Special reference counting behavior. */ n->refcount = REFCOUNT_INFINITY; @@ -454,7 +454,7 @@ acc_unmap_data (void *h) (void *) n->host_start, (int) host_size, (void *) h); } /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from - 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating + 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating the different 'REFCOUNT_INFINITY' cases, or simply separate 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' etc.)? */ @@ -475,14 +475,19 @@ acc_unmap_data (void *h) gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("cannot unmap target block"); } - else if (tgt->refcount > 1) - tgt->refcount--; - else + + if (tgt->refcount == 1) { - free (tgt->array); - free (tgt); + /* This is the last reference. Nullifying these fields prevents + 'gomp_unmap_tgt' via 'gomp_remove_var' from freeing the target + memory. */ + tgt->tgt_end = 0; + tgt->to_free = NULL; } + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (is_tgt_unmapped); + gomp_mutex_unlock (&acc_dev->lock); if (profiling_p) @@ -540,10 +545,8 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) assert (n->refcount != REFCOUNT_LINK); if (n->refcount != REFCOUNT_INFINITY) - { - n->refcount++; - n->virtual_refcount++; - } + n->refcount++; + n->dynamic_refcount++; gomp_mutex_unlock (&acc_dev->lock); } @@ -555,16 +558,18 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) goacc_aq aq = get_goacc_asyncqueue (async); - gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); + struct target_mem_desc *tgt + = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, + kinds, true, GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); + assert (tgt->list_count == 1); + n = tgt->list[0].key; + assert (n); + assert (n->refcount == 1); + assert (n->dynamic_refcount == 0); + n->dynamic_refcount++; - gomp_mutex_lock (&acc_dev->lock); - n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); - assert (n != NULL); - assert (n->tgt_offset == 0); - assert ((uintptr_t) hostaddrs[0] == n->host_start); - d = (void *) n->tgt->tgt_start; - gomp_mutex_unlock (&acc_dev->lock); + d = (void *) tgt->tgt_start; } if (profiling_p) @@ -683,23 +688,28 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) (void *) h, (int) s, (void *) n->host_start, (int) host_size); } + assert (n->refcount != REFCOUNT_LINK); + if (n->refcount != REFCOUNT_INFINITY + && n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } + bool finalize = (kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FORCE_FROM); if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->virtual_refcount; - n->virtual_refcount = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - - if (n->virtual_refcount > 0) + else if (n->dynamic_refcount) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->virtual_refcount--; + n->dynamic_refcount--; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) - n->refcount--; if (n->refcount == 0) { @@ -722,8 +732,16 @@ goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) gomp_remove_var_async (acc_dev, n, aq); else { + int num_mappings = 0; + /* If the target_mem_desc represents a single data mapping, we can + check that it is freed when this splay tree key's refcount + reaches zero. Otherwise (e.g. for a struct mapping with multiple + members), fall back to skipping the test. */ + for (int i = 0; i < n->tgt->list_count; i++) + if (n->tgt->list[i].key) + num_mappings++; bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); - assert (is_tgt_unmapped); + assert (num_mappings > 1 || is_tgt_unmapped); } } @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, { for (size_t i = 0; i < mapnum; i++) { - int group_last = find_group_last (i, mapnum, sizes, kinds); + splay_tree_key n; + size_t group_last = find_group_last (i, mapnum, sizes, kinds); + bool struct_p = false; + size_t size, groupnum = (group_last - i) + 1; - gomp_map_vars_async (acc_dev, aq, - (group_last - i) + 1, - &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, - GOMP_MAP_VARS_OPENACC_ENTER_DATA); + switch (kinds[i] & 0xff) + { + case GOMP_MAP_STRUCT: + { + int last = i + sizes[i]; + size = (uintptr_t) hostaddrs[last] + sizes[last] + - (uintptr_t) hostaddrs[i]; + struct_p = true; + } + break; + + case GOMP_MAP_ATTACH: + size = sizeof (void *); + break; + + default: + size = sizes[i]; + } + + n = lookup_host (acc_dev, hostaddrs[i], size); + + if (n && struct_p) + { + if (n->refcount != REFCOUNT_INFINITY) + n->refcount += groupnum - 1; + n->dynamic_refcount += groupnum - 1; + gomp_mutex_unlock (&acc_dev->lock); + } + else if (n && groupnum == 1) + { + void *h = hostaddrs[i]; + size_t s = sizes[i]; + + /* A standalone attach clause. */ + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, + (uintptr_t) h, s, NULL); + else if (h + s > (void *) n->host_end) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); + } + + assert (n->refcount != REFCOUNT_LINK); + if (n->refcount != REFCOUNT_INFINITY) + n->refcount++; + n->dynamic_refcount++; + + gomp_mutex_unlock (&acc_dev->lock); + } + else if (n && groupnum > 1) + { + assert (n->refcount != REFCOUNT_INFINITY + && n->refcount != REFCOUNT_LINK); + + bool processed = false; + + struct target_mem_desc *tgt = n->tgt; + for (size_t j = 0; j < tgt->list_count; j++) + if (tgt->list[j].key == n) + { + for (size_t k = 0; k < groupnum; k++) + if (j + k < tgt->list_count && tgt->list[j + k].key) + { + tgt->list[j + k].key->refcount++; + tgt->list[j + k].key->dynamic_refcount++; + } + processed = true; + } + + gomp_mutex_unlock (&acc_dev->lock); + if (!processed) + gomp_fatal ("dynamic refcount incrementing failed for " + "pointer/pset"); + } + else if (hostaddrs[i]) + { + gomp_mutex_unlock (&acc_dev->lock); + + struct target_mem_desc *tgt + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); + for (size_t j = 0; j < tgt->list_count; j++) + { + n = tgt->list[j].key; + if (n) + n->dynamic_refcount++; + } + } i = group_last; } @@ -1115,18 +1222,15 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->virtual_refcount; - n->virtual_refcount = 0; + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - - if (n->virtual_refcount > 0) + else if (n->dynamic_refcount) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->virtual_refcount--; + n->dynamic_refcount--; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) - n->refcount--; if (copyfrom && (kind != GOMP_MAP_FROM || n->refcount == 0)) @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, cur_node.host_end - cur_node.host_start); if (n->refcount == 0) - gomp_remove_var_async (acc_dev, n, aq); - } - break; - - case GOMP_MAP_STRUCT: - { - int elems = sizes[i]; - for (int j = 1; j <= elems; j++) { - struct splay_tree_key_s k; - k.host_start = (uintptr_t) hostaddrs[i + j]; - k.host_end = k.host_start + sizes[i + j]; - splay_tree_key str; - str = splay_tree_lookup (&acc_dev->mem_map, &k); - if (str) + if (aq) { - if (finalize) - { - if (str->refcount != REFCOUNT_INFINITY) - str->refcount -= str->virtual_refcount; - str->virtual_refcount = 0; - } - if (str->virtual_refcount > 0) + /* TODO The way the following code is currently + implemented, we need the 'is_tgt_unmapped' return + value from 'gomp_remove_var', so can't use + 'gomp_remove_var_async' here -- see the + 'gomp_unref_tgt' comment in + <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; + PR92881 -- so have to synchronize here. */ + if (!acc_dev->openacc.async.synchronize_func (aq)) { - if (str->refcount != REFCOUNT_INFINITY) - str->refcount--; - str->virtual_refcount--; + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("synchronize failed"); } - else if (str->refcount > 0 - && str->refcount != REFCOUNT_INFINITY) - str->refcount--; - if (str->refcount == 0) - gomp_remove_var_async (acc_dev, str, aq); } + int num_mappings = 0; + /* If the target_mem_desc represents a single data mapping, we + can check that it is freed when this splay tree key's + refcount reaches zero. Otherwise (e.g. for a struct + mapping with multiple members), fall back to skipping the + test. */ + for (int j = 0; j < n->tgt->list_count; j++) + if (n->tgt->list[j].key) + num_mappings++; + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (num_mappings > 1 || is_tgt_unmapped); } - i += elems; } break; + case GOMP_MAP_STRUCT: + continue; + default: gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", kind); diff --git a/libgomp/target.c b/libgomp/target.c index 36425477dcb..3f2becdae0e 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -666,8 +666,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA - || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; tgt->prev = NULL; struct gomp_coalesce_buf cbuf, *cbufp = NULL; @@ -1094,7 +1093,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].copy_from = false; tgt->list[i].always_copy_from = false; tgt->list[i].do_detach - = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA); + = (pragma_kind != GOMP_MAP_VARS_ENTER_DATA); n->refcount++; } else @@ -1155,7 +1154,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1294,20 +1293,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ - if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA - || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) - && tgt->refcount == 0) - { - /* If we're about to discard a target_mem_desc with no "structural" - references (tgt->refcount == 0), any splay keys linked in the tgt's - list must have their virtual refcount incremented to represent that - "lost" reference in order to implement the semantics of the OpenACC - "present increment" operation properly. */ - if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) - for (i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key) - tgt->list[i].key->virtual_refcount++; - + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { free (tgt); tgt = NULL; } @@ -1459,14 +1446,7 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, continue; bool do_unmap = false; - if (k->tgt == tgt - && k->virtual_refcount > 0 - && k->refcount != REFCOUNT_INFINITY) - { - k->virtual_refcount--; - k->refcount--; - } - else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) k->refcount--; else if (k->refcount == 1) { @@ -1631,7 +1611,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_table[i].start; k->refcount = REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -1665,7 +1645,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_var->start; k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -2935,7 +2915,7 @@ omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, k->tgt = tgt; k->tgt_offset = (uintptr_t) device_ptr + device_offset; k->refcount = REFCOUNT_INFINITY; - k->virtual_refcount = 0; + k->dynamic_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c new file mode 100644 index 00000000000..4e6d06d48d5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-1.c @@ -0,0 +1,31 @@ +/* Test dynamic unmapping of separate structure members. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <assert.h> +#include <openacc.h> + +struct s +{ + char a; + char b; +}; + +int main () +{ + struct s s; + +#pragma acc enter data create(s.a, s.b) + + assert (acc_is_present (&s.a, sizeof s.a)); + assert (acc_is_present (&s.b, sizeof s.b)); + +#pragma acc exit data delete(s.a) +#pragma acc exit data delete(s.b) + + assert (!acc_is_present (&s.a, sizeof s.a)); + assert (!acc_is_present (&s.b, sizeof s.b)); + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c new file mode 100644 index 00000000000..5539fd8d57f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/refcounting-2.c @@ -0,0 +1,31 @@ +/* Test dynamic unmapping of separate structure members. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include <assert.h> +#include <openacc.h> + +struct s +{ + char a; + char b; +}; + +int main () +{ + struct s s; + +#pragma acc enter data create(s.a, s.b) + + assert (acc_is_present (&s.a, sizeof s.a)); + assert (acc_is_present (&s.b, sizeof s.b)); + + acc_delete (&s.a, sizeof s.a); + acc_delete (&s.b, sizeof s.b); + + assert (!acc_is_present (&s.a, sizeof s.a)); + assert (!acc_is_present (&s.b, sizeof s.b)); + + return 0; +} + -- 2.23.0