Hi! The following testcase is miscompiled, because due to various disabled optimization passes we end up with a dead bsf instruction (CTZ) of a register known to be zero. fold_rtx uses simplify_unary_operation, which has in this case: case CTZ: if (wi::ne_p (op0, 0)) int_value = wi::ctz (op0); else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value)) int_value = GET_MODE_PRECISION (imode); result = wi::shwi (int_value, result_mode); break; x86_64 is a target where CTZ_DEFINED_VALUE_AT_ZERO is false, the instruction keeps previous value of the destination register, so something pretty random. As it is undefined, simplifying it to something random is fine, except when used the way CSE uses it, by remembering that the value (const_int 32) is stored in the destination register and optimizing later code that has (set some_reg (const_int 32)) to that destination register. Beucase that destination register contains an indeterminate value, we can't expect it will be exactly 32.
The following patch let us punt in these cases. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Another option would be to tweak simplify-rtx.c and instead of doing else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value)) int_value = GET_MODE_PRECISION (imode); do else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value)) return NULL_RTX; and similarly for CLZ, haven't tested what would break if anything; we've been doing something like that since r62453 when the C?Z_DEFINED_VALUE_AT_ZERO macros have been introduced, and before that actually the same, just unconditionally assumed the value is undefined at 0. 2018-04-12 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/85376 * cse.c (fold_rtx): For CLZ and CTZ don't try to simplify if the source is known to be zero and CLZ/CTZ is not defined at zero for the target. * gcc.dg/pr85376.c: New test. --- gcc/cse.c.jj 2018-02-12 23:24:47.350482694 +0100 +++ gcc/cse.c 2018-04-12 17:49:32.157664289 +0200 @@ -3322,6 +3322,19 @@ fold_rtx (rtx x, rtx_insn *insn) && mode_arg0 == VOIDmode) break; + /* Avoid recording a constant value for CLZ or CTZ if the argument is + known to be zero when the operation is undefined for zero on the + target. See PR85376. */ + if ((code == CLZ || code == CTZ) + && ((const_arg0 ? const_arg0 : folded_arg0) == CONST0_RTX (mode))) + { + int dummy; + scalar_mode imode = GET_MODE_INNER (mode); + if ((code == CLZ && !CLZ_DEFINED_VALUE_AT_ZERO (imode, dummy)) + || (code == CTZ && !CTZ_DEFINED_VALUE_AT_ZERO (imode, dummy))) + break; + } + new_rtx = simplify_unary_operation (code, mode, const_arg0 ? const_arg0 : folded_arg0, mode_arg0); --- gcc/testsuite/gcc.dg/pr85376.c.jj 2018-04-12 17:44:41.506370642 +0200 +++ gcc/testsuite/gcc.dg/pr85376.c 2018-04-12 17:45:11.669401115 +0200 @@ -0,0 +1,32 @@ +/* PR rtl-optimization/85376 */ +/* { dg-do run { target int128 } } */ +/* { dg-options "-Og -fno-dce -fgcse -fno-tree-ccp -fno-tree-copy-prop -Wno-psabi" } */ + +typedef unsigned int U __attribute__ ((vector_size (64))); +typedef unsigned __int128 V __attribute__ ((vector_size (64))); +unsigned int e, i, l; +unsigned char f; +U g, h, k, j; + +static inline V +foo (unsigned char n, unsigned short o, unsigned int p, U q, U r, U s) +{ + unsigned int t; + o <<= 5; + q[7] >>= __builtin_add_overflow (0xfffffff0, __builtin_ffs (n), &s[5]); + t = __builtin_ffs (g[7]); + e *= __builtin_sub_overflow (o, t, &f); + return f + (V) g + (V) h + (V) q + i + (V) j + (V) s + (V) k + l; +} + +int +main () +{ + if (__SIZEOF_INT128__ != 16 || __SIZEOF_INT__ != 4 || __CHAR_BIT__ != 8) + return 0; + V x = foo (0, 1, 5, (U) { }, (U) { }, (U) { }); + for (unsigned i = 0; i < 4; i++) + if ((unsigned int) x[i] != 0x20) + __builtin_abort (); + return 0; +} Jakub