cprop1 pass does not consider paradoxical subreg and for (insn 22) claims that it equals 8 elements of HImodeby setting REG_EQUAL note:
(insn 21 19 22 4 (set (reg:V4QI 98) (mem/u/c:V4QI (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S4 A32])) "pr110206.c":12:42 1530 {*movv4qi_internal} (expr_list:REG_EQUAL (const_vector:V4QI [ (const_int -52 [0xffffffffffffffcc]) repeated x4 ]) (nil))) (insn 22 21 23 4 (set (reg:V8HI 100) (zero_extend:V8HI (vec_select:V8QI (subreg:V16QI (reg:V4QI 98) 0) (parallel [ (const_int 0 [0]) (const_int 1 [0x1]) (const_int 2 [0x2]) (const_int 3 [0x3]) (const_int 4 [0x4]) (const_int 5 [0x5]) (const_int 6 [0x6]) (const_int 7 [0x7]) ])))) "pr110206.c":12:42 7471 {sse4_1_zero_extendv8qiv8hi2} (expr_list:REG_EQUAL (const_vector:V8HI [ (const_int 204 [0xcc]) repeated x8 ]) (expr_list:REG_DEAD (reg:V4QI 98) (nil)))) We rely on the "undefined" vals to have a specific value (from the earlier REG_EQUAL note) but actual code generation doesn't ensure this (it doesn't need to). That said, the issue isn't the constant folding per-se but that we do not actually constant fold but register an equality that doesn't hold. PR target/110206 gcc/ChangeLog: * fwprop.cc (contains_paradoxical_subreg_p): Move to ... * rtlanal.cc (contains_paradoxical_subreg_p): ... here. * rtlanal.h (contains_paradoxical_subreg_p): Add prototype. * cprop.cc (try_replace_reg): Do not set REG_EQUAL note when the original source contains a paradoxical subreg. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr110206.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. OK for mainline and backports? Uros.
diff --git a/gcc/cprop.cc b/gcc/cprop.cc index b7400c9a421..cf6facaa8c4 100644 --- a/gcc/cprop.cc +++ b/gcc/cprop.cc @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "backend.h" #include "rtl.h" +#include "rtlanal.h" #include "cfghooks.h" #include "df.h" #include "insn-config.h" @@ -795,7 +796,8 @@ try_replace_reg (rtx from, rtx to, rtx_insn *insn) /* If we've failed perform the replacement, have a single SET to a REG destination and don't yet have a note, add a REG_EQUAL note to not lose information. */ - if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set))) + if (!success && note == 0 && set != 0 && REG_P (SET_DEST (set)) + && !contains_paradoxical_subreg_p (SET_SRC (set))) note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); } diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc index ae342f59407..0707a234726 100644 --- a/gcc/fwprop.cc +++ b/gcc/fwprop.cc @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "backend.h" #include "rtl.h" +#include "rtlanal.h" #include "df.h" #include "rtl-ssa.h" @@ -353,21 +354,6 @@ reg_single_def_p (rtx x) return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x)); } -/* Return true if X contains a paradoxical subreg. */ - -static bool -contains_paradoxical_subreg_p (rtx x) -{ - subrtx_var_iterator::array_type array; - FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) - { - x = *iter; - if (SUBREG_P (x) && paradoxical_subreg_p (x)) - return true; - } - return false; -} - /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of USE_INSN. Return the number of substitutions on success, otherwise return -1 and leave USE_INSN unchanged. diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc index 31707f3b90a..8b48fc243a1 100644 --- a/gcc/rtlanal.cc +++ b/gcc/rtlanal.cc @@ -6970,3 +6970,18 @@ vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel) } return false; } + +/* Return true if X contains a paradoxical subreg. */ + +bool +contains_paradoxical_subreg_p (rtx x) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) + { + x = *iter; + if (SUBREG_P (x) && paradoxical_subreg_p (x)) + return true; + } + return false; +} diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h index 9013e75c04b..4f0dea8e99f 100644 --- a/gcc/rtlanal.h +++ b/gcc/rtlanal.h @@ -338,4 +338,6 @@ vec_series_highpart_p (machine_mode result_mode, machine_mode op_mode, bool vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel); +bool +contains_paradoxical_subreg_p (rtx x); #endif diff --git a/gcc/testsuite/gcc.dg/torture/pr110206.c b/gcc/testsuite/gcc.dg/torture/pr110206.c new file mode 100644 index 00000000000..3a4f221ef47 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr110206.c @@ -0,0 +1,30 @@ +/* PR target/110206 */ +/* { dg-do run { target x86_64-*-* i?86-*-* } } */ + +typedef unsigned char __attribute__((__vector_size__ (4))) U; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned short u16; + +V g; + +void +__attribute__((noinline)) +foo (U u, u16 c, V *r) +{ + if (!c) + __builtin_abort (); + V x = __builtin_shufflevector (u, (204 >> u), 7, 0, 5, 1, 3, 5, 0, 2); + V y = __builtin_shufflevector (g, (V) { }, 7, 6, 6, 7, 2, 6, 3, 5); + V z = __builtin_shufflevector (y, 204 * x, 3, 9, 8, 1, 4, 6, 14, 5); + *r = z; +} + +int +main (void) +{ + V r; + foo ((U){4}, 5, &r); + if (r[6] != 0x30) + __builtin_abort(); + return 0; +}