Hi, this patch adds matching of non-constant CONSTRUCTOR expressions into operand_equal_p. As discussed with Richard, those can happen when we are building vectors out of components. I also added a testcase that triggers this path and gets folded bit earlier (tree-ssa fold it in tree-pre eventually even at mainline). Again, this will become more relevant for ipa-icf. The code has few positives in testsuite (mostly in autovec), none in bootstrap.
The code can't handle incomplete ctors that match complete: typedef char __attribute__ ((vector_size (4))) v4qi; v4qi v; void ret(char a) { v4qi c={a,a,0,0},d={a,a}; v = (c!=d); } gets compiled as (optimized dump): ;; Function ret (ret, funcdef_no=0, decl_uid=1758, cgraph_uid=0, symbol_order=1) ret (char a) { v4qi d; v4qi c; vector(4) signed char _4; char _7; char _8; signed char _9; char _10; char _11; signed char _12; char _13; char _14; signed char _15; char _16; char _17; signed char _18; <bb 2>: c_2 = {a_1(D), a_1(D), 0, 0}; d_3 = {a_1(D), a_1(D)}; _7 = a_1(D); _8 = a_1(D); _9 = 0; _10 = a_1(D); _11 = a_1(D); _12 = 0; _13 = 0; _14 = 0; _15 = 0; _16 = 0; _17 = 0; _18 = 0; _4 = {_9, _12, _15, _18}; v = _4; return; } This is produced by forwprop4 by: <bb 2>: c_2 = {a_1(D), a_1(D), 0, 0}; d_3 = {a_1(D), a_1(D)}; _7 = BIT_FIELD_REF <c_2, 8, 0>; _8 = BIT_FIELD_REF <d_3, 8, 0>; _9 = _7 != _8 ? -1 : 0; _10 = BIT_FIELD_REF <c_2, 8, 8>; _11 = BIT_FIELD_REF <d_3, 8, 8>; _12 = _10 != _11 ? -1 : 0; _13 = BIT_FIELD_REF <c_2, 8, 16>; _14 = BIT_FIELD_REF <d_3, 8, 16>; _15 = _13 != _14 ? -1 : 0; _16 = BIT_FIELD_REF <c_2, 8, 24>; _17 = BIT_FIELD_REF <d_3, 8, 24>; _18 = _16 != _17 ? -1 : 0; _4 = {_9, _12, _15, _18}; which gets done by veclower (so PRE misses its change to clean up the redundancies) from: <bb 2>: c_2 = {a_1(D), a_1(D), 0, 0}; d_3 = {a_1(D), a_1(D)}; _4 = VEC_COND_EXPR <c_2 != d_3, { -1, -1, -1, -1 }, { 0, 0, 0, 0 }>; Which clearly shows we have more than one issue here. I would rather canonicalize the ctors and strenghten definition of gimple to require them to be always full. I suppose we want to eitehr cleanup after veclower or make it bit smarter about duplications. Bootstrapped/regtested x86_64-linux. OK? * fold-const.c (operand_equal_p): Handle matching of vector constructors. * gcc.dg/tree-ssa/operand-equal-2.c: New testcase. Index: testsuite/gcc.dg/tree-ssa/operand-equal-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/operand-equal-2.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/operand-equal-2.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ + +typedef char __attribute__ ((vector_size (4))) v4qi; + +v4qi v; +void ret(char a) +{ + v4qi c={a,a,a,a},d={a,a,a,a}; + v = (c!=d); +} +/* { dg-final { scan-tree-dump "v = . 0, 0, 0, 0 ." "forwprop1"} } */ Index: fold-const.c =================================================================== --- fold-const.c (revision 229153) +++ fold-const.c (working copy) @@ -2809,11 +2809,12 @@ operand_equal_p (const_tree arg0, const_ return 0; } - /* This is needed for conversions and for COMPONENT_REF. - Might as well play it safe and always test this. */ + /* When not checking adddresses, this is needed for conversions and for + COMPONENT_REF. Might as well play it safe and always test this. */ if (TREE_CODE (TREE_TYPE (arg0)) == ERROR_MARK || TREE_CODE (TREE_TYPE (arg1)) == ERROR_MARK - || TYPE_MODE (TREE_TYPE (arg0)) != TYPE_MODE (TREE_TYPE (arg1))) + || (TYPE_MODE (TREE_TYPE (arg0)) != TYPE_MODE (TREE_TYPE (arg1)) + && !(flags & OEP_ADDRESS_OF))) return 0; /* If ARG0 and ARG1 are the same SAVE_EXPR, they are necessarily equal. @@ -3149,6 +3150,56 @@ operand_equal_p (const_tree arg0, const_ && DECL_BUILT_IN_CLASS (arg0) == DECL_BUILT_IN_CLASS (arg1) && DECL_FUNCTION_CODE (arg0) == DECL_FUNCTION_CODE (arg1)); + case tcc_exceptional: + if (TREE_CODE (arg0) == CONSTRUCTOR) + { + /* In GIMPLE constructors are used only to build vectors from + elements. Individual elements in the constructor must be + indexed in increasing order and form an initial sequence. + + We make no effort to compare constructors in generic. + (see sem_variable::equals in ipa-icf which can do so for + constants). */ + if (!VECTOR_TYPE_P (TREE_TYPE (arg0)) + || !VECTOR_TYPE_P (TREE_TYPE (arg1))) + return 0; + + /* Be sure that vectors constructed have the same representation. + We only tested element precision and modes to match. + Vectors may be BLKmode and thus also check that the number of + parts match. */ + if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) + != TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1))) + return 0; + + vec<constructor_elt, va_gc> *v0 = CONSTRUCTOR_ELTS (arg0); + vec<constructor_elt, va_gc> *v1 = CONSTRUCTOR_ELTS (arg1); + unsigned int len = vec_safe_length (v0); + + if (len != vec_safe_length (v1)) + return 0; + + for (unsigned int i = 0; i < len; i++) + { + constructor_elt *c0 = &(*v0)[i]; + constructor_elt *c1 = &(*v1)[i]; + + if (!operand_equal_p (c0->value, c1->value, flags) + /* In GIMPLE the indexes can be either NULL or matching i. + Double check this so we won't get false + positives for GENERIC. */ + || (c0->index + && (TREE_CODE (c0->index) != INTEGER_CST + || !compare_tree_int (c0->index, i))) + || (c1->index + && (TREE_CODE (c1->index) != INTEGER_CST + || !compare_tree_int (c1->index, i)))) + return 0; + } + return 1; + } + return 0; + default: return 0; }