On Fri, 10 Jan 2025, Alexandre Oliva wrote: > > A narrowing conversion and a shift both drop bits from the loaded > value, but we need to take into account which one comes first to get > the right number of bits and mask. > > Fold when applying masks to parts, comparing the parts, and combining > the results, in the odd chance either mask happens to be zero. > > Regstrapped on x86_64-linux-gnu. Ok to intall?
OK. Richard. > > for gcc/ChangeLog > > PR tree-optimization/118206 > * gimple-fold.cc (decode_field_reference): Account for upper > bits dropped by narrowing conversions whether before or after > a right shift. > (fold_truth_andor_for_ifcombine): Fold masks, compares, and > combined results. > > for gcc/testsuite/ChangeLog > > PR tree-optimization/118206 > * gcc.dg/field-merge-18.c: New. > --- > gcc/gimple-fold.cc | 39 ++++++++++++++++++++++++---- > gcc/testsuite/gcc.dg/field-merge-18.c | 46 > +++++++++++++++++++++++++++++++++ > 2 files changed, 79 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/field-merge-18.c > > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc > index c8a726e0ae3f3..d95f04213ee40 100644 > --- a/gcc/gimple-fold.cc > +++ b/gcc/gimple-fold.cc > @@ -7547,6 +7547,7 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT > *pbitsize, > int shiftrt = 0; > tree res_ops[2]; > machine_mode mode; > + bool convert_before_shift = false; > > *load = NULL; > *psignbit = false; > @@ -7651,6 +7652,12 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT > *pbitsize, > if (*load) > loc[3] = gimple_location (*load); > exp = res_ops[0]; > + /* This looks backwards, but we're going back the def chain, so if we > + find the conversion here, after finding a shift, that's because the > + convert appears before the shift, and we should thus adjust the bit > + pos and size because of the shift after adjusting it due to type > + conversion. */ > + convert_before_shift = true; > } > > /* Identify the load, if there is one. */ > @@ -7693,6 +7700,15 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT > *pbitsize, > *pvolatilep = volatilep; > > /* Adjust shifts... */ > + if (convert_before_shift > + && outer_type && *pbitsize > TYPE_PRECISION (outer_type)) > + { > + HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type); > + if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) > + *pbitpos += excess; > + *pbitsize -= excess; > + } > + > if (shiftrt) > { > if (!*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) > @@ -7701,7 +7717,8 @@ decode_field_reference (tree *pexp, HOST_WIDE_INT > *pbitsize, > } > > /* ... and bit position. */ > - if (outer_type && *pbitsize > TYPE_PRECISION (outer_type)) > + if (!convert_before_shift > + && outer_type && *pbitsize > TYPE_PRECISION (outer_type)) > { > HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type); > if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) > @@ -8377,6 +8394,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, > tree truth_type, > if (get_best_mode (end_bit - first_bit, first_bit, 0, ll_end_region, > ll_align, BITS_PER_WORD, volatilep, &lnmode)) > l_split_load = false; > + /* ??? If ll and rl share the same load, reuse that? > + See PR 118206 -> gcc.dg/field-merge-18.c */ > else > { > /* Consider the possibility of recombining loads if any of the > @@ -8757,11 +8776,11 @@ fold_truth_andor_for_ifcombine (enum tree_code code, > tree truth_type, > /* Apply masks. */ > for (int j = 0; j < 2; j++) > if (mask[j] != wi::mask (0, true, mask[j].get_precision ())) > - op[j] = build2_loc (locs[j][2], BIT_AND_EXPR, type, > - op[j], wide_int_to_tree (type, mask[j])); > + op[j] = fold_build2_loc (locs[j][2], BIT_AND_EXPR, type, > + op[j], wide_int_to_tree (type, mask[j])); > > - cmp[i] = build2_loc (i ? rloc : lloc, wanted_code, truth_type, > - op[0], op[1]); > + cmp[i] = fold_build2_loc (i ? rloc : lloc, wanted_code, truth_type, > + op[0], op[1]); > } > > /* Reorder the compares if needed. */ > @@ -8773,7 +8792,15 @@ fold_truth_andor_for_ifcombine (enum tree_code code, > tree truth_type, > if (parts == 1) > result = cmp[0]; > else if (!separatep || !maybe_separate) > - result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]); > + { > + /* Only fold if any of the cmp is known, otherwise we may lose the > + sequence point, and that may prevent further optimizations. */ > + if (TREE_CODE (cmp[0]) == INTEGER_CST > + || TREE_CODE (cmp[1]) == INTEGER_CST) > + result = fold_build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]); > + else > + result = build2_loc (rloc, orig_code, truth_type, cmp[0], cmp[1]); > + } > else > { > result = cmp[0]; > diff --git a/gcc/testsuite/gcc.dg/field-merge-18.c > b/gcc/testsuite/gcc.dg/field-merge-18.c > new file mode 100644 > index 0000000000000..e8413873d2418 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/field-merge-18.c > @@ -0,0 +1,46 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1" } */ > + > +/* PR tree-optimization/118206 */ > +/* Check that shifts, whether before or after narrowing conversions, mask out > + the bits that are to be discarded. */ > + > +/* This only uses bits from the least significant byte in the short. */ > +__attribute__((noipa)) int > +foo (const void *x) > +{ > + unsigned short b; > + __builtin_memcpy (&b, x, sizeof (short)); > + if ((b & 15) != 8) > + return 1; > + if ((((unsigned char) b) >> 4) > 7) > + return 1; > + return 0; > +} > + > +__attribute__((noipa)) int > +bar (const void *x) > +{ > + unsigned short b; > + __builtin_memcpy (&b, x, sizeof (short)); > + if ((b & 15) != 8) > + return 1; > + if ((unsigned char)(b >> 4) > 7) > + return 1; > + return 0; > +} > + > +int > +main () > +{ > + unsigned short a = 0x78 - 0x80 - 0x80; > + if (foo (&a) != 0 || bar (&a) != (a > 0xff)) > + __builtin_abort (); > + unsigned short b = 0x88; > + if (foo (&b) != 1 || bar (&b) != 1) > + __builtin_abort (); > + unsigned short c = 8; > + if (foo (&c) != 0 || bar (&c) != 0) > + __builtin_abort (); > + return 0; > +} > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)