On Tue, 24 Nov 2020, Jakub Jelinek wrote:

> Hi!
> 
> Currently the __builtin_clear_padding expansion code emits no code for
> full words that don't have any padding bits, and most of the time if
> the only padding bytes are from the start of the word it attempts to merge
> them with previous padding store (via {}) or if the only padding bytes are
> from the end of the word, it attempts to merge it with following padding
> bytes.  For everything else it was using a RMW, except when it found
> an aligned char/short/int covering all the padding bytes and all those
> padding bytes were all ones in that store.
> 
> The following patch changes it, such that we only use RMW if the padding has
> any bytes which have some padding and some non-padding bits (i.e. bitfields
> are involved), often it is the same amount of instructions in the end and
> avoids being thread-unsafe unless necessary (and avoids having to wait for
> the reads to make it into the CPU).  So, if there are no bitfields,
> the function will just store some zero bytes, shorts, ints, long longs etc.
> where needed.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2020-11-24  Jakub Jelinek  <ja...@redhat.com>
> 
>       * gimple-fold.c (clear_padding_flush): If a word contains only 0
>       or 0xff bytes of padding other than all set, all clear, all set
>       followed by all clear or all clear followed by all set, don't emit
>       a RMW operation on the whole word or parts of it, but instead
>       clear the individual bytes of padding.  For paddings of one byte
>       size, don't use char[1] and {}, but instead just char and 0.
> 
> --- gcc/gimple-fold.c.jj      2020-11-23 11:59:18.790279409 +0100
> +++ gcc/gimple-fold.c 2020-11-23 15:48:36.723510615 +0100
> @@ -4033,7 +4033,9 @@ clear_padding_flush (clear_padding_struc
>      {
>        size_t nonzero_first = wordsize;
>        size_t nonzero_last = 0;
> -      bool all_ones = true;
> +      size_t zero_first = wordsize;
> +      size_t zero_last = 0;
> +      bool all_ones = true, bytes_only = true;
>        if ((unsigned HOST_WIDE_INT) (buf->off + i + wordsize)
>         > (unsigned HOST_WIDE_INT) buf->sz)
>       {
> @@ -4055,9 +4057,19 @@ clear_padding_flush (clear_padding_struc
>               all_ones = false;
>             nonzero_last = j + 1 - i;
>           }
> +       else
> +         {
> +           if (zero_first == wordsize)
> +             zero_first = j - i;
> +           zero_last = j + 1 - i;
> +         }
>         if (buf->buf[j] != 0 && buf->buf[j] != (unsigned char) ~0)
> -         all_ones = false;
> +         {
> +           all_ones = false;
> +           bytes_only = false;
> +         }
>       }
> +      size_t padding_end = i;
>        if (padding_bytes)
>       {
>         if (nonzero_first == 0
> @@ -4069,7 +4081,6 @@ clear_padding_flush (clear_padding_struc
>             padding_bytes += wordsize;
>             continue;
>           }
> -       size_t padding_end = i;
>         if (all_ones && nonzero_first == 0)
>           {
>             padding_bytes += nonzero_last;
> @@ -4077,12 +4088,27 @@ clear_padding_flush (clear_padding_struc
>             nonzero_first = wordsize;
>             nonzero_last = 0;
>           }
> -       tree atype = build_array_type_nelts (char_type_node, padding_bytes);
> +       else if (bytes_only && nonzero_first == 0)
> +         {
> +           gcc_assert (zero_first && zero_first != wordsize);
> +           padding_bytes += zero_first;
> +           padding_end += zero_first;
> +         }
> +       tree atype, src;
> +       if (padding_bytes == 1)
> +         {
> +           atype = char_type_node;
> +           src = build_zero_cst (char_type_node);
> +         }
> +       else
> +         {
> +           atype = build_array_type_nelts (char_type_node, padding_bytes);
> +           src = build_constructor (atype, NULL);
> +         }
>         tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
>                                build_int_cst (buf->alias_type,
>                                               buf->off + padding_end
>                                               - padding_bytes));
> -       tree src = build_constructor (atype, NULL);
>         gimple *g = gimple_build_assign (dst, src);
>         gimple_set_location (g, buf->loc);
>         gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> @@ -4099,6 +4125,45 @@ clear_padding_flush (clear_padding_struc
>         padding_bytes = nonzero_last - nonzero_first;
>         continue;
>       }
> +      if (bytes_only)
> +     {
> +       /* If bitfields aren't involved in this word, prefer storing
> +          individual bytes or groups of them over performing a RMW
> +          operation on the whole word.  */
> +       gcc_assert (i + zero_last <= end);
> +       for (size_t j = padding_end; j < i + zero_last; j++)
> +         {
> +           if (buf->buf[j])
> +             {
> +               size_t k;
> +               for (k = j; k < i + zero_last; k++)
> +                 if (buf->buf[k] == 0)
> +                   break;
> +               HOST_WIDE_INT off = buf->off + j;
> +               tree atype, src;
> +               if (k - j == 1)
> +                 {
> +                   atype = char_type_node;
> +                   src = build_zero_cst (char_type_node);
> +                 }
> +               else
> +                 {
> +                   atype = build_array_type_nelts (char_type_node, k - j);
> +                   src = build_constructor (atype, NULL);
> +                 }
> +               tree dst = build2_loc (buf->loc, MEM_REF, atype,
> +                                      buf->base,
> +                                      build_int_cst (buf->alias_type, off));
> +               gimple *g = gimple_build_assign (dst, src);
> +               gimple_set_location (g, buf->loc);
> +               gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> +               j = k;
> +             }
> +         }
> +       if (nonzero_last == wordsize)
> +         padding_bytes = nonzero_last - zero_last;
> +       continue;
> +     }
>        for (size_t eltsz = 1; eltsz <= wordsize; eltsz <<= 1)
>       {
>         if (nonzero_last - nonzero_first <= eltsz
> @@ -4153,12 +4218,21 @@ clear_padding_flush (clear_padding_struc
>      {
>        if (padding_bytes)
>       {
> -       tree atype = build_array_type_nelts (char_type_node, padding_bytes);
> +       tree atype, src;
> +       if (padding_bytes == 1)
> +         {
> +           atype = char_type_node;
> +           src = build_zero_cst (char_type_node);
> +         }
> +       else
> +         {
> +           atype = build_array_type_nelts (char_type_node, padding_bytes);
> +           src = build_constructor (atype, NULL);
> +         }
>         tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
>                                build_int_cst (buf->alias_type,
>                                               buf->off + end
>                                               - padding_bytes));
> -       tree src = build_constructor (atype, NULL);
>         gimple *g = gimple_build_assign (dst, src);
>         gimple_set_location (g, buf->loc);
>         gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend

Reply via email to