Hi Julian! On 2020-06-03T14:36:14+0200, I wrote: > On 2020-05-22T15:16:05-0700, Julian Brown <jul...@codesourcery.com> wrote: >> This patch adjusts the semantics of dynamic reference counts, as described >> in the parent email. > > Thanks! > > A few questions, but no need to send an updated patch. > >> --- a/libgomp/oacc-mem.c >> +++ b/libgomp/oacc-mem.c > >> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr >> *acc_dev, size_t mapnum, >> { >> for (size_t i = 0; i < mapnum; i++) >> { >> - int group_last = find_group_last (i, mapnum, sizes, kinds); >> + splay_tree_key n; >> + size_t group_last = find_group_last (i, mapnum, sizes, kinds); >> + bool struct_p = false; >> + size_t size, groupnum = (group_last - i) + 1; >> >> - gomp_map_vars_async (acc_dev, aq, >> - (group_last - i) + 1, >> - &hostaddrs[i], NULL, >> - &sizes[i], &kinds[i], true, >> - GOMP_MAP_VARS_OPENACC_ENTER_DATA); >> + switch (kinds[i] & 0xff) >> + { >> + case GOMP_MAP_STRUCT: >> + { >> + int last = i + sizes[i]; > > The 'last' calculated here must always equal the 'group_last' calculated > above. ;-) (... so we might just use 'group_last' instead of 'last' in > the following.) > >> + size = (uintptr_t) hostaddrs[last] + sizes[last] >> + - (uintptr_t) hostaddrs[i]; >> + struct_p = true; >> + } >> + break; >> + >> + case GOMP_MAP_ATTACH: >> + size = sizeof (void *); >> + break; >> + >> + default: >> + size = sizes[i]; >> + } >> + >> + n = lookup_host (acc_dev, hostaddrs[i], size); >> + > >> + if (n && struct_p) >> + { >> + if (n->refcount != REFCOUNT_INFINITY) >> + n->refcount += groupnum - 1; >> + n->dynamic_refcount += groupnum - 1; >> + gomp_mutex_unlock (&acc_dev->lock); >> + } > > Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that > just an optimization of the 'n && groupnum > 1' case below?
Eh, OK, I think I see where this is going; the 'n && groupnum > 1' case below might not necessarily take care of the 'groupnum - 1' refcounts that we're filing here? >> + else if (n && groupnum == 1) >> + { >> + void *h = hostaddrs[i]; >> + size_t s = sizes[i]; >> + >> + /* A standalone attach clause. */ >> + if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH) >> + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, >> + (uintptr_t) h, s, NULL); >> + else if (h + s > (void *) n->host_end) >> + { >> + gomp_mutex_unlock (&acc_dev->lock); >> + gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); >> + } >> + >> + assert (n->refcount != REFCOUNT_LINK); >> + if (n->refcount != REFCOUNT_INFINITY) >> + n->refcount++; >> + n->dynamic_refcount++; >> + >> + gomp_mutex_unlock (&acc_dev->lock); >> + } > >> + else if (n && groupnum > 1) >> + { >> + assert (n->refcount != REFCOUNT_INFINITY >> + && n->refcount != REFCOUNT_LINK); >> + >> + bool processed = false; >> + >> + struct target_mem_desc *tgt = n->tgt; >> + for (size_t j = 0; j < tgt->list_count; j++) >> + if (tgt->list[j].key == n) >> + { >> + for (size_t k = 0; k < groupnum; k++) >> + if (j + k < tgt->list_count && tgt->list[j + k].key) >> + { >> + tgt->list[j + k].key->refcount++; >> + tgt->list[j + k].key->dynamic_refcount++; >> + } >> + processed = true; >> + } >> + >> + gomp_mutex_unlock (&acc_dev->lock); >> + if (!processed) >> + gomp_fatal ("dynamic refcount incrementing failed for " >> + "pointer/pset"); >> + } > > Please add some text to explain the nested 'j', 'k' loops and their 'if' > conditionals, and the 'groupnum' usage in the 'k' loop boundary. Should > the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)' > (..., or is 'groupnum' relevant?), and in the loop body then use 'k' > instead of 'j + k'? (Maybe I've now confused myself, staring at this for > a while...) Audacious as I am sometimes, I did put a '__builtin_abort' right after 'tgt->list[j].key == n' -- and it doesn't trigger one single time for the current libgomp test cases, meaning this is all dead code? I'm confused. >> + else if (hostaddrs[i]) >> + { >> + gomp_mutex_unlock (&acc_dev->lock); >> + >> + struct target_mem_desc *tgt >> + = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, >> + &sizes[i], &kinds[i], true, >> + GOMP_MAP_VARS_ENTER_DATA); >> + assert (tgt); >> + for (size_t j = 0; j < tgt->list_count; j++) >> + { >> + n = tgt->list[j].key; >> + if (n) >> + n->dynamic_refcount++; >> + } >> + } > > ... else nothing. This latter "nothing" case (not present, and no > 'hostaddrs[i]') is exercised by > 'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only). Is that > alright? > >> >> i = group_last; >> } > > >> @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr >> *acc_dev, size_t mapnum, > > (Diff slightly edited.) > >> if (n->refcount == 0) >> - gomp_remove_var_async (acc_dev, n, aq); > >> + { >> + if (aq) >> + { >> + /* TODO The way the following code is currently >> + implemented, we need the 'is_tgt_unmapped' return >> + value from 'gomp_remove_var', so can't use >> + 'gomp_remove_var_async' here -- see the >> + 'gomp_unref_tgt' comment in >> + >> <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; >> + PR92881 -- so have to synchronize here. */ >> + if (!acc_dev->openacc.async.synchronize_func (aq)) >> + { >> + gomp_mutex_unlock (&acc_dev->lock); >> + gomp_fatal ("synchronize failed"); >> + } >> + } > > As far as I understand, it's no longer true that "The way the following > code is [...] implemented, we need the 'is_tgt_unmapped' return value > from 'gomp_remove_var'". In particular, we now can/should "use > 'gomp_remove_var_async' here", and no longer "have to synchronize here"? > > Indeed I'm happy to see that the logic below no longer depends on > 'is_tgt_unmapped' for its loop exit condition. Instead of the above, > this now can use the standard pattern: > > if (aq) > /* TODO We can't do the 'is_tgt_unmapped' checking -- see the > 'gomp_unref_tgt' comment in > > <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>; > PR92881. */ > gomp_remove_var_async (acc_dev, n, aq); > else > { [as follows] } > >> + int num_mappings = 0; >> + /* If the target_mem_desc represents a single data mapping, we >> + can check that it is freed when this splay tree key's >> + refcount reaches zero. Otherwise (e.g. for a struct >> + mapping with multiple members), fall back to skipping the >> + test. */ >> + for (int j = 0; j < n->tgt->list_count; j++) >> + if (n->tgt->list[j].key) >> + num_mappings++; >> + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); >> + assert (num_mappings > 1 || is_tgt_unmapped); >> + } >> } >> break; > > For reference, the old logic (mandating what was described in the comment > above) was: > > bool is_tgt_unmapped = false; > for (size_t i = 0; i < t->list_count; i++) > { > is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key); > if (is_tgt_unmapped) > break; > } > assert (is_tgt_unmapped); Grüße Thomas ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter