On Fri, 10 Jan 2025, Alexandre Oliva wrote:

> 
> A narrowing conversion and a shift both drop bits from the loaded
> value, but we need to take into account which one comes first to get
> the right number of bits and mask.
> 
> Fold when applying masks to parts, comparing the parts, and combining
> the results, in the odd chance either mask happens to be zero.
> 
> Regstrapped on x86_64-linux-gnu.  Ok to intall?

OK.

Richard.

> 
> for  gcc/ChangeLog
> 
>       PR tree-optimization/118206
>       * gimple-fold.cc (decode_field_reference): Account for upper
>       bits dropped by narrowing conversions whether before or after
>       a right shift.
>       (fold_truth_andor_for_ifcombine): Fold masks, compares, and
>       combined results.
> 
> for  gcc/testsuite/ChangeLog
> 
>       PR tree-optimization/118206
>       * gcc.dg/field-merge-18.c: New.
> ---
>  gcc/gimple-fold.cc                    |   39 ++++++++++++++++++++++++----
>  gcc/testsuite/gcc.dg/field-merge-18.c |   46 
> +++++++++++++++++++++++++++++++++
>  2 files changed, 79 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/field-merge-18.c
> 
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index c8a726e0ae3f3..d95f04213ee40 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -7547,6 +7547,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
> *pbitsize,
>    int shiftrt = 0;
>    tree res_ops[2];
>    machine_mode mode;
> +  bool convert_before_shift = false;
>  
>    *load = NULL;
>    *psignbit = false;
> @@ -7651,6 +7652,12 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
> *pbitsize,
>        if (*load)
>       loc[3] = gimple_location (*load);
>        exp = res_ops[0];
> +      /* This looks backwards, but we're going back the def chain, so if we
> +      find the conversion here, after finding a shift, that's because the
> +      convert appears before the shift, and we should thus adjust the bit
> +      pos and size because of the shift after adjusting it due to type
> +      conversion.  */
> +      convert_before_shift = true;
>      }
>  
>    /* Identify the load, if there is one.  */
> @@ -7693,6 +7700,15 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
> *pbitsize,
>    *pvolatilep = volatilep;
>  
>    /* Adjust shifts...  */
> +  if (convert_before_shift
> +      && outer_type && *pbitsize > TYPE_PRECISION (outer_type))
> +    {
> +      HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
> +      if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
> +     *pbitpos += excess;
> +      *pbitsize -= excess;
> +    }
> +
>    if (shiftrt)
>      {
>        if (!*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
> @@ -7701,7 +7717,8 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT 
> *pbitsize,
>      }
>  
>    /* ... and bit position.  */
> -  if (outer_type && *pbitsize > TYPE_PRECISION (outer_type))
> +  if (!convert_before_shift
> +      && outer_type && *pbitsize > TYPE_PRECISION (outer_type))
>      {
>        HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
>        if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
> @@ -8377,6 +8394,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
> tree truth_type,
>    if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region,
>                    ll_align, BITS_PER_WORD, volatilep, &lnmode))
>      l_split_load = false;
> +  /* ??? If ll and rl share the same load, reuse that?
> +     See PR 118206 -> gcc.dg/field-merge-18.c  */
>    else
>      {
>        /* Consider the possibility of recombining loads if any of the
> @@ -8757,11 +8776,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
> tree truth_type,
>        /* Apply masks.  */
>        for (int j = 0; j < 2; j++)
>       if (mask[j] != wi::mask (0, true, mask[j].get_precision ()))
> -       op[j] = build2_loc (locs[j][2], BIT_AND_EXPR, type,
> -                           op[j], wide_int_to_tree (type, mask[j]));
> +       op[j] = fold_build2_loc (locs[j][2], BIT_AND_EXPR, type,
> +                                op[j], wide_int_to_tree (type, mask[j]));
>  
> -      cmp[i] = build2_loc (i ? rloc : lloc, wanted_code, truth_type,
> -                        op[0], op[1]);
> +      cmp[i] = fold_build2_loc (i ? rloc : lloc, wanted_code, truth_type,
> +                             op[0], op[1]);
>      }
>  
>    /* Reorder the compares if needed.  */
> @@ -8773,7 +8792,15 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
> tree truth_type,
>    if (parts == 1)
>      result = cmp[0];
>    else if (!separatep || !maybe_separate)
> -    result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
> +    {
> +      /* Only fold if any of the cmp is known, otherwise we may lose the
> +      sequence point, and that may prevent further optimizations.  */
> +      if (TREE_CODE (cmp[0]) == INTEGER_CST
> +       || TREE_CODE (cmp[1]) == INTEGER_CST)
> +     result = fold_build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
> +      else
> +     result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]);
> +    }
>    else
>      {
>        result = cmp[0];
> diff --git a/gcc/testsuite/gcc.dg/field-merge-18.c 
> b/gcc/testsuite/gcc.dg/field-merge-18.c
> new file mode 100644
> index 0000000000000..e8413873d2418
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/field-merge-18.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1" } */
> +
> +/* PR tree-optimization/118206 */
> +/* Check that shifts, whether before or after narrowing conversions, mask out
> +   the bits that are to be discarded.  */
> +
> +/* This only uses bits from the least significant byte in the short.  */
> +__attribute__((noipa)) int
> +foo (const void *x)
> +{
> +  unsigned short b;
> +  __builtin_memcpy (&b, x, sizeof (short));
> +  if ((b & 15) != 8)
> +    return 1;
> +  if ((((unsigned char) b) >> 4) > 7)
> +    return 1;
> +  return 0;
> +}
> +
> +__attribute__((noipa)) int
> +bar (const void *x)
> +{
> +  unsigned short b;
> +  __builtin_memcpy (&b, x, sizeof (short));
> +  if ((b & 15) != 8)
> +    return 1;
> +  if ((unsigned char)(b >> 4) > 7)
> +    return 1;
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  unsigned short a = 0x78 - 0x80 - 0x80;
> +  if (foo (&a) != 0 || bar (&a) != (a > 0xff))
> +    __builtin_abort ();
> +  unsigned short b = 0x88;
> +  if (foo (&b) != 1 || bar (&b) != 1)
> +    __builtin_abort ();
> +  unsigned short c = 8;
> +  if (foo (&c) != 0 || bar (&c) != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to