On Tue, Jun 10, 2014 at 10:49 AM, Thomas Preud'homme <thomas.preudho...@arm.com> wrote: >> From: Richard Biener [mailto:richard.guent...@gmail.com] >> Sent: Wednesday, June 04, 2014 5:39 PM >> To: Thomas Preud'homme >> > >> >> I'm failing to get why you possibly need two casts ... you should >> only need one, from the bswap/load result to the final type >> (zero-extended as you say - so the load type should simply be >> unsigned which it is already). > > You are right indeed. I failed to realize that the problems I > encountered were caused by an initially wrong understanding of > the reason behind PR61306. All this code is not necessary. > >> >> So I think that the testcase in the patch is fixed already by >> doing the n->type change (and a proper sign-extension detection). >> >> Can you please split that part out? > > Doing so I realize the patch was incomplete. Sign extension can > be triggered in two distinct place in the code (right shift and cast) > that can both lead to incorrect code being generated. With some > efforts I managed to create two testcases that work both on > GCC trunk but also GCC 4.9 and 4.8. > > ChangeLog entries are: > > *** gcc/ChangeLog *** > > 2014-06-05 Thomas Preud'homme <thomas.preudho...@arm.com> > > PR tree-optimization/61306 > * tree-ssa-math-opts.c (struct symbolic_number): Store type of > expression instead of its size. > (do_shift_rotate): Adapt to change in struct symbolic_number. Return > false to prevent optimization when the result is unpredictable due to > arithmetic right shift of signed type with highest byte is set. > (verify_symbolic_number_p): Adapt to change in struct symbolic_number. > (init_symbolic_number): Likewise. > (find_bswap_or_nop_1): Likewise. Return NULL to prevent optimization > when the result is unpredictable due to sign extension. > > *** gcc/testsuite/ChangeLog *** > > 2014-06-05 Thomas Preud'homme <thomas.preudho...@arm.com> > > * gcc.c-torture/execute/pr61306-1.c: New test. > * gcc.c-torture/execute/pr61306-2.c: Likewise. > * gcc.c-torture/execute/pr61306-3.c: Likewise. > > > diff --git a/gcc/testsuite/gcc.c-torture/execute/pr61306-1.c > b/gcc/testsuite/gcc.c-torture/execute/pr61306-1.c > new file mode 100644 > index 0000000..f6e8ff3 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/pr61306-1.c > @@ -0,0 +1,39 @@ > +#ifdef __INT32_TYPE__ > +typedef __INT32_TYPE__ int32_t; > +#else > +typedef int int32_t; > +#endif > + > +#ifdef __UINT32_TYPE__ > +typedef __UINT32_TYPE__ uint32_t; > +#else > +typedef unsigned uint32_t; > +#endif > + > +#define __fake_const_swab32(x) ((uint32_t)( \ > + (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \ > + (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \ > + (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \ > + (( (int32_t)(x) & (int32_t)0xff000000UL) >> 24))) > + > +/* Previous version of bswap optimization failed to consider sign extension > + and as a result would replace an expression *not* doing a bswap by a > + bswap. */ > + > +__attribute__ ((noinline, noclone)) uint32_t > +fake_bswap32 (uint32_t in) > +{ > + return __fake_const_swab32 (in); > +} > + > +int > +main(void) > +{ > + if (sizeof (int32_t) * __CHAR_BIT__ != 32) > + return 0; > + if (sizeof (uint32_t) * __CHAR_BIT__ != 32) > + return 0; > + if (fake_bswap32 (0x87654321) != 0xffffff87) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.c-torture/execute/pr61306-2.c > b/gcc/testsuite/gcc.c-torture/execute/pr61306-2.c > new file mode 100644 > index 0000000..6cbbd19 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/pr61306-2.c > @@ -0,0 +1,40 @@ > +#ifdef __INT16_TYPE__ > +typedef __INT16_TYPE__ int16_t; > +#else > +typedef short int16_t; > +#endif > + > +#ifdef __UINT32_TYPE__ > +typedef __UINT32_TYPE__ uint32_t; > +#else > +typedef unsigned uint32_t; > +#endif > + > +#define __fake_const_swab32(x) ((uint32_t)( \ > + (((uint32_t) (x) & (uint32_t)0x000000ffUL) << 24) | \ > + (((uint32_t)(int16_t)(x) & (uint32_t)0x00ffff00UL) << 8) | \ > + (((uint32_t) (x) & (uint32_t)0x00ff0000UL) >> 8) | \ > + (((uint32_t) (x) & (uint32_t)0xff000000UL) >> 24))) > + > + > +/* Previous version of bswap optimization failed to consider sign extension > + and as a result would replace an expression *not* doing a bswap by a > + bswap. */ > + > +__attribute__ ((noinline, noclone)) uint32_t > +fake_bswap32 (uint32_t in) > +{ > + return __fake_const_swab32 (in); > +} > + > +int > +main(void) > +{ > + if (sizeof (uint32_t) * __CHAR_BIT__ != 32) > + return 0; > + if (sizeof (int16_t) * __CHAR_BIT__ != 16) > + return 0; > + if (fake_bswap32 (0x81828384) != 0xff838281) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.c-torture/execute/pr61306-3.c > b/gcc/testsuite/gcc.c-torture/execute/pr61306-3.c > new file mode 100644 > index 0000000..6086e27 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/execute/pr61306-3.c > @@ -0,0 +1,13 @@ > +short a = -1; > +int b; > +char c; > + > +int > +main () > +{ > + c = a; > + b = a | c; > + if (b != -1) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c > index 658b341..d92c433 100644 > --- a/gcc/tree-ssa-math-opts.c > +++ b/gcc/tree-ssa-math-opts.c > @@ -1622,7 +1622,7 @@ make_pass_cse_sincos (gcc::context *ctxt) > > struct symbolic_number { > uint64_t n; > - int size; > + tree type; > tree base_addr; > tree offset; > HOST_WIDE_INT bytepos; > @@ -1652,13 +1652,15 @@ do_shift_rotate (enum tree_code code, > struct symbolic_number *n, > int count) > { > + int bitsize = TYPE_PRECISION (n->type); > + > if (count % 8 != 0) > return false; > > /* Zero out the extra bits of N in order to avoid them being shifted > into the significant bits. */ > - if (n->size < (int)sizeof (int64_t)) > - n->n &= ((uint64_t)1 << (n->size * BITS_PER_UNIT)) - 1; > + if (bitsize < 8 * (int)sizeof (int64_t)) > + n->n &= ((uint64_t)1 << bitsize) - 1; > > switch (code) > { > @@ -1666,20 +1668,23 @@ do_shift_rotate (enum tree_code code, > n->n <<= count; > break; > case RSHIFT_EXPR: > + /* Arithmetic shift of signed type: result is dependent on the value. > */ > + if (!TYPE_UNSIGNED (n->type) && (n->n & (0xff << (bitsize - 8)))) > + return false; > n->n >>= count; > break; > case LROTATE_EXPR: > - n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count)); > + n->n = (n->n << count) | (n->n >> (bitsize - count)); > break; > case RROTATE_EXPR: > - n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count)); > + n->n = (n->n >> count) | (n->n << (bitsize - count)); > break; > default: > return false; > } > /* Zero unused bits for size. */ > - if (n->size < (int)sizeof (int64_t)) > - n->n &= ((uint64_t)1 << (n->size * BITS_PER_UNIT)) - 1; > + if (bitsize < 8 * (int)sizeof (int64_t)) > + n->n &= ((uint64_t)1 << bitsize) - 1; > return true; > } > > @@ -1696,7 +1701,7 @@ verify_symbolic_number_p (struct symbolic_number *n, > gimple stmt) > if (TREE_CODE (lhs_type) != INTEGER_TYPE) > return false; > > - if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT) > + if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) > return false; > > return true; > @@ -1708,20 +1713,23 @@ verify_symbolic_number_p (struct symbolic_number *n, > gimple stmt) > static bool > init_symbolic_number (struct symbolic_number *n, tree src) > { > + int size; > + > n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; > > /* Set up the symbolic number N by setting each byte to a value between 1 > and > the byte size of rhs1. The highest order byte is set to n->size and the > lowest order byte to 1. */ > - n->size = TYPE_PRECISION (TREE_TYPE (src)); > - if (n->size % BITS_PER_UNIT != 0) > + n->type = TREE_TYPE (src); > + size = TYPE_PRECISION (n->type); > + if (size % BITS_PER_UNIT != 0) > return false; > - n->size /= BITS_PER_UNIT; > - n->range = n->size; > + size /= BITS_PER_UNIT; > + n->range = size; > n->n = CMPNOP; > > - if (n->size < (int)sizeof (int64_t)) > - n->n &= ((uint64_t)1 << (n->size * BITS_PER_UNIT)) - 1; > + if (size < (int)sizeof (int64_t)) > + n->n &= ((uint64_t)1 << (size * BITS_PER_UNIT)) - 1; > > return true; > } > @@ -1857,12 +1865,12 @@ find_bswap_or_nop_1 (gimple stmt, struct > symbolic_number *n, int limit) > { > case BIT_AND_EXPR: > { > - int i; > + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; > uint64_t val = int_cst_value (rhs2); > uint64_t tmp = val; > > /* Only constants masking full bytes are allowed. */ > - for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT) > + for (i = 0; i < size; i++, tmp >>= BITS_PER_UNIT) > if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff) > return NULL_TREE; > > @@ -1878,21 +1886,30 @@ find_bswap_or_nop_1 (gimple stmt, struct > symbolic_number *n, int limit) > break; > CASE_CONVERT: > { > - int type_size; > + int type_size, old_type_size; > + tree type; > > - type_size = TYPE_PRECISION (gimple_expr_type (stmt)); > + type = gimple_expr_type (stmt); > + type_size = TYPE_PRECISION (type); > if (type_size % BITS_PER_UNIT != 0) > return NULL_TREE; > > + /* Sign extension: result is dependent on the value. */ > + old_type_size = TYPE_PRECISION (n->type); > + if (!TYPE_UNSIGNED (n->type) > + && type_size > old_type_size > + && n->n & (0xff << (old_type_size - 8))) > + return NULL_TREE; > + > if (type_size / BITS_PER_UNIT < (int)(sizeof (int64_t))) > { > /* If STMT casts to a smaller type mask out the bits not > belonging to the target type. */ > n->n &= ((uint64_t)1 << type_size) - 1; > } > - n->size = type_size / BITS_PER_UNIT; > + n->type = type; > if (!n->base_addr) > - n->range = n->size; > + n->range = type_size / BITS_PER_UNIT; > } > break; > default: > @@ -1905,7 +1922,7 @@ find_bswap_or_nop_1 (gimple stmt, struct > symbolic_number *n, int limit) > > if (rhs_class == GIMPLE_BINARY_RHS) > { > - int i; > + int i, size; > struct symbolic_number n1, n2; > uint64_t mask; > tree source_expr2; > @@ -1931,7 +1948,7 @@ find_bswap_or_nop_1 (gimple stmt, struct > symbolic_number *n, int limit) > if (!source_expr2) > return NULL_TREE; > > - if (n1.size != n2.size) > + if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) > return NULL_TREE; > > if (!n1.vuse != !n2.vuse || > @@ -1997,8 +2014,9 @@ find_bswap_or_nop_1 (gimple stmt, struct > symbolic_number *n, int limit) > n->base_addr = n1.base_addr; > n->offset = n1.offset; > n->bytepos = n1.bytepos; > - n->size = n1.size; > - for (i = 0, mask = 0xff; i < n->size; i++, mask <<= BITS_PER_UNIT) > + n->type = n1.type; > + size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; > + for (i = 0, mask = 0xff; i < size; i++, mask <<= BITS_PER_UNIT) > { > uint64_t masked1, masked2; > > > Is this OK for trunk? Does this bug qualify for a backport patch to > 4.8 and 4.9 branches?
This is ok for trunk and also for backporting (after a short while to see if there is any fallout). Thanks, RIchard. > Best regards, > > Thomas > >