Hi Julian!

On 2020-05-22T15:16:05-0700, Julian Brown <jul...@codesourcery.com> wrote:
> This patch adjusts the semantics of dynamic reference counts, as described
> in the parent email.

Thanks!

A few questions, but no need to send an updated patch.

> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c

> @@ -1018,13 +1036,102 @@ goacc_enter_data_internal (struct gomp_device_descr 
> *acc_dev, size_t mapnum,
>  {
>    for (size_t i = 0; i < mapnum; i++)
>      {
> -      int group_last = find_group_last (i, mapnum, sizes, kinds);
> +      splay_tree_key n;
> +      size_t group_last = find_group_last (i, mapnum, sizes, kinds);
> +      bool struct_p = false;
> +      size_t size, groupnum = (group_last - i) + 1;
>
> -      gomp_map_vars_async (acc_dev, aq,
> -                        (group_last - i) + 1,
> -                        &hostaddrs[i], NULL,
> -                        &sizes[i], &kinds[i], true,
> -                        GOMP_MAP_VARS_OPENACC_ENTER_DATA);
> +      switch (kinds[i] & 0xff)
> +     {
> +     case GOMP_MAP_STRUCT:
> +       {
> +         int last = i + sizes[i];

The 'last' calculated here must always equal the 'group_last' calculated
above.  ;-) (... so we might just use 'group_last' instead of 'last' in
the following.)

> +         size = (uintptr_t) hostaddrs[last] + sizes[last]
> +                - (uintptr_t) hostaddrs[i];
> +         struct_p = true;
> +       }
> +       break;
> +
> +     case GOMP_MAP_ATTACH:
> +       size = sizeof (void *);
> +       break;
> +
> +     default:
> +       size = sizes[i];
> +     }
> +
> +      n = lookup_host (acc_dev, hostaddrs[i], size);
> +

> +      if (n && struct_p)
> +     {
> +       if (n->refcount != REFCOUNT_INFINITY)
> +         n->refcount += groupnum - 1;
> +       n->dynamic_refcount += groupnum - 1;
> +       gomp_mutex_unlock (&acc_dev->lock);
> +     }

Is the 'GOMP_MAP_STRUCT' handling here specifically necessary, or is that
just an optimization of the 'n && groupnum > 1' case below?

> +      else if (n && groupnum == 1)
> +     {
> +       void *h = hostaddrs[i];
> +       size_t s = sizes[i];
> +
> +       /* A standalone attach clause.  */
> +       if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
> +         gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
> +                              (uintptr_t) h, s, NULL);
> +       else if (h + s > (void *) n->host_end)
> +         {
> +           gomp_mutex_unlock (&acc_dev->lock);
> +           gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
> +         }
> +
> +       assert (n->refcount != REFCOUNT_LINK);
> +       if (n->refcount != REFCOUNT_INFINITY)
> +         n->refcount++;
> +       n->dynamic_refcount++;
> +
> +       gomp_mutex_unlock (&acc_dev->lock);
> +     }

> +      else if (n && groupnum > 1)
> +     {
> +       assert (n->refcount != REFCOUNT_INFINITY
> +               && n->refcount != REFCOUNT_LINK);
> +
> +       bool processed = false;
> +
> +       struct target_mem_desc *tgt = n->tgt;
> +       for (size_t j = 0; j < tgt->list_count; j++)
> +         if (tgt->list[j].key == n)
> +           {
> +             for (size_t k = 0; k < groupnum; k++)
> +               if (j + k < tgt->list_count && tgt->list[j + k].key)
> +                 {
> +                   tgt->list[j + k].key->refcount++;
> +                   tgt->list[j + k].key->dynamic_refcount++;
> +                 }
> +             processed = true;
> +           }
> +
> +       gomp_mutex_unlock (&acc_dev->lock);
> +       if (!processed)
> +         gomp_fatal ("dynamic refcount incrementing failed for "
> +                     "pointer/pset");
> +     }

Please add some text to explain the nested 'j', 'k' loops and their 'if'
conditionals, and the 'groupnum' usage in the 'k' loop boundary.  Should
the 'k' loop maybe run 'for (size_t k = j; k < tgt->list_count; ++k)'
(..., or is 'groupnum' relevant?), and in the loop body then use 'k'
instead of 'j + k'?  (Maybe I've now confused myself, staring at this for
a while...)

> +      else if (hostaddrs[i])
> +     {
> +       gomp_mutex_unlock (&acc_dev->lock);
> +
> +       struct target_mem_desc *tgt
> +         = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
> +                                &sizes[i], &kinds[i], true,
> +                                GOMP_MAP_VARS_ENTER_DATA);
> +       assert (tgt);
> +       for (size_t j = 0; j < tgt->list_count; j++)
> +         {
> +           n = tgt->list[j].key;
> +           if (n)
> +             n->dynamic_refcount++;
> +         }
> +     }

... else nothing.  This latter "nothing" case (not present, and no
'hostaddrs[i]') is exercised by
'libgomp.oacc-fortran/optional-data-enter-exit.f90' (only).  Is that
alright?

>
>        i = group_last;
>      }


> @@ -1137,45 +1241,40 @@ goacc_exit_data_internal (struct gomp_device_descr 
> *acc_dev, size_t mapnum,

(Diff slightly edited.)

>           if (n->refcount == 0)
> -           gomp_remove_var_async (acc_dev, n, aq);

> +           {
> +             if (aq)
> +               {
> +                 /* TODO The way the following code is currently
> +                    implemented, we need the 'is_tgt_unmapped' return
> +                    value from 'gomp_remove_var', so can't use
> +                    'gomp_remove_var_async' here -- see the
> +                    'gomp_unref_tgt' comment in
> +                    
> <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
> +                    PR92881 -- so have to synchronize here.  */
> +                 if (!acc_dev->openacc.async.synchronize_func (aq))
> +                   {
> +                     gomp_mutex_unlock (&acc_dev->lock);
> +                     gomp_fatal ("synchronize failed");
> +                   }
> +               }

As far as I understand, it's no longer true that "The way the following
code is [...] implemented, we need the 'is_tgt_unmapped' return value
from 'gomp_remove_var'".  In particular, we now can/should "use
'gomp_remove_var_async' here", and no longer "have to synchronize here"?

Indeed I'm happy to see that the logic below no longer depends on
'is_tgt_unmapped' for its loop exit condition.  Instead of the above,
this now can use the standard pattern:

    if (aq)
      /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
         'gomp_unref_tgt' comment in
         <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
         PR92881.  */
      gomp_remove_var_async (acc_dev, n, aq);
    else
      { [as follows] }

> +             int num_mappings = 0;
> +             /* If the target_mem_desc represents a single data mapping, we
> +                can check that it is freed when this splay tree key's
> +                refcount reaches zero.  Otherwise (e.g. for a struct
> +                mapping with multiple members), fall back to skipping the
> +                test.  */
> +             for (int j = 0; j < n->tgt->list_count; j++)
> +               if (n->tgt->list[j].key)
> +                 num_mappings++;
> +             bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
> +             assert (num_mappings > 1 || is_tgt_unmapped);
> +           }
>         }
>         break;

For reference, the old logic (mandating what was described in the comment
above) was:

    bool is_tgt_unmapped = false;
    for (size_t i = 0; i < t->list_count; i++)
     {
       is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
       if (is_tgt_unmapped)
         break;
     }
    assert (is_tgt_unmapped);


Grüße
 Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter

Reply via email to