The following fixes a latent issue in the match.pd variant of fold_sign_changed_comparison which replaces an unsigned integer comparison with a signed boolean comparison of the same precision despite the fact that we treat BOOLEAN_TYPEs as only having two valid values.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed to trunk sofar. 2022-06-28 Richard Biener <rguent...@suse.de> PR middle-end/106053 * match.pd ((T)a == (T)b): Avoid folding away sign changes in a comparison if we'd truncate to a boolean. * gcc.target/i386/pr106053.c: New testcase. --- gcc/match.pd | 7 ++++- gcc/testsuite/gcc.target/i386/pr106053.c | 37 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr106053.c diff --git a/gcc/match.pd b/gcc/match.pd index 24cbbbb5bc1..c43c528424e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5542,7 +5542,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && (TYPE_UNSIGNED (TREE_TYPE (@00)) == TYPE_UNSIGNED (TREE_TYPE (@0)) || cmp == NE_EXPR || cmp == EQ_EXPR) - && !POINTER_TYPE_P (TREE_TYPE (@00))) + && !POINTER_TYPE_P (TREE_TYPE (@00)) + /* (int)bool:32 != (int)uint is not the same as + bool:32 != (bool:32)uint since boolean types only have two valid + values independent of their precision. */ + && (TREE_CODE (TREE_TYPE (@00)) != BOOLEAN_TYPE + || TREE_CODE (TREE_TYPE (@10)) == BOOLEAN_TYPE)) /* ??? The special-casing of INTEGER_CST conversion was in the original code and here to avoid a spurious overflow flag on the resulting constant which fold_convert produces. */ diff --git a/gcc/testsuite/gcc.target/i386/pr106053.c b/gcc/testsuite/gcc.target/i386/pr106053.c new file mode 100644 index 00000000000..69baa3e5fd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106053.c @@ -0,0 +1,37 @@ +/* { dg-do run { target lp64 } } */ +/* { dg-options "-O -fno-tree-fre -w -mno-avx" } */ + +typedef unsigned __attribute__((__vector_size__ (32))) v256u8; +typedef unsigned __attribute__((__vector_size__ (64))) v512u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef unsigned long __attribute__((__vector_size__ (64))) v512u64; +typedef unsigned __int128 __attribute__((__vector_size__ (32))) v256u128; +unsigned u; +v512u64 foo0_v512u64_0; + +static inline v256u8 +foo (u32 u32_0, u64 u64_0, v256u128 v256u128_0) +{ + int o = __builtin_add_overflow_p (u64_0, 0, 0); + v512u64 v512u64_1 = + foo0_v512u64_0 & (u32) __builtin_sub_overflow_p (0, o, 0); + u64_0 |= u; + v256u128 v256u128_2 = u64_0 < v256u128_0; + v256u128 v256u128_3 = -v256u128_2 == u64_0 * u32_0; + v256u8 v256u8_r = ((union { + v512u8 a; v256u8 b[2]; + }) (v512u8) v512u64_1).b[0] + (v256u8) v256u128_3; + return v256u8_r; +} + +int +main (void) +{ + v256u8 x = foo (3095179400, 23725760132, (v256u128) { 2, 2 }); + for (unsigned i = 0; i < sizeof (x) / sizeof (x[0]); i++) + if (x[i]) + __builtin_abort (); + return 0; +} -- 2.35.3