Hello,here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the target is ok with it.
I am attaching 2 versions of the patch. p-good is the one that passes testing. p-bad, where I rely on fold_stmt to detect identity permutations, ICEs towards the end of the pass while checking a bogus gimple stmt (one that gimple_debug_stmt crashes on if I call it in gdb). From a performance point of view, p-good makes sense, but I liked the simplicity of p-bad and I am confused as to why it fails.
2012-09-11 Marc Glisse <marc.gli...@inria.fr> gcc/ * tree-ssa-forwprop.c (simplify_vector_constructor): New function. (ssa_forward_propagate_and_combine): Call it. gcc/testsuite/ * gcc.dg/tree-ssa/forwprop-22.c: New testcase. -- Marc Glisse
Index: Makefile.in =================================================================== --- Makefile.in (revision 191173) +++ Makefile.in (working copy) @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ $(GIMPLE_PRETTY_PRINT_H) langhooks.h tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ + $(TREE_VECTORIZER_H) tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ $(TREE_PRETTY_PRINT_H) tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c =================================================================== --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_perm } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); +void f (vec *px, vec *y, vec *z) +{ + vec x = *px; + vec t1 = { x[1], x[0] }; + vec t2 = { x[0], x[1] }; + *y = t1; + *z = t2; +} + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision URL Added: svn:eol-style + native Index: tree-ssa-forwprop.c =================================================================== --- tree-ssa-forwprop.c (revision 191173) +++ tree-ssa-forwprop.c (working copy) @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. #include "tm_p.h" #include "basic-block.h" #include "gimple-pretty-print.h" #include "tree-flow.h" #include "tree-pass.h" #include "langhooks.h" #include "flags.h" #include "gimple.h" #include "expr.h" #include "cfgloop.h" +#include "tree-vectorizer.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized form of tree combination. It is hoped all of this can disappear when we have a generalized tree combiner. One class of common cases we handle is forward propagating a single use variable into a COND_EXPR. bb0: @@ -2787,20 +2788,105 @@ simplify_permutation (gimple_stmt_iterat if (TREE_CODE (op0) == SSA_NAME) ret = remove_prop_source_from_use (op0); if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) ret |= remove_prop_source_from_use (op1); return ret ? 2 : 1; } return 0; } +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ + +static bool +simplify_vector_constructor (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + gimple def_stmt; + tree op, op2, orig, type, elem_type; + unsigned elem_size, nelts, i; + enum tree_code code; + constructor_elt *elt; + unsigned char *sel; + bool maybe_ident; + + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); + + op = gimple_assign_rhs1 (stmt); + type = TREE_TYPE (op); + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); + + nelts = TYPE_VECTOR_SUBPARTS (type); + elem_type = TREE_TYPE (type); + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); + + sel = XALLOCAVEC (unsigned char, nelts); + orig = NULL; + maybe_ident = true; + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) + { + tree ref, op1; + + if (i >= nelts) + return false; + + if (TREE_CODE (elt->value) != SSA_NAME) + return false; + def_stmt = SSA_NAME_DEF_STMT (elt->value); + if (!def_stmt || !is_gimple_assign (def_stmt)) + return false; + code = gimple_assign_rhs_code (def_stmt); + if (code != BIT_FIELD_REF) + return false; + op1 = gimple_assign_rhs1 (def_stmt); + ref = TREE_OPERAND (op1, 0); + if (orig) + { + if (ref != orig) + return false; + } + else + { + if (TREE_CODE (ref) != SSA_NAME) + return false; + orig = ref; + } + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) + return false; + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; + if (sel[i] != i) maybe_ident = false; + } + if (i < nelts) + return false; + + if (maybe_ident) + { + gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); + gimple_set_num_ops (stmt, 2); + gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); + update_stmt (stmt); + return true; + } + + op2 = vect_gen_perm_mask (type, sel); + if (!op2) + return false; + orig = unshare_expr (orig); + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); + gimple_set_num_ops (stmt, 4); + gimple_assign_set_rhs1 (stmt, orig); + gimple_assign_set_rhs2 (stmt, orig); + gimple_assign_set_rhs3 (stmt, op2); + update_stmt (stmt); + return true; +} + /* Main entry point for the forward propagation and statement combine optimizer. */ static unsigned int ssa_forward_propagate_and_combine (void) { basic_block bb; unsigned int todoflags = 0; cfg_changed = false; @@ -2958,20 +3044,23 @@ ssa_forward_propagate_and_combine (void) } else if (code == VEC_PERM_EXPR) { int did_something = simplify_permutation (&gsi); if (did_something == 2) cfg_changed = true; changed = did_something != 0; } else if (code == BIT_FIELD_REF) changed = simplify_bitfield_ref (&gsi); + else if (code == CONSTRUCTOR + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) + changed = simplify_vector_constructor (&gsi); break; } case GIMPLE_SWITCH: changed = simplify_gimple_switch (stmt); break; case GIMPLE_COND: { int did_something;
Index: Makefile.in =================================================================== --- Makefile.in (revision 191173) +++ Makefile.in (working copy) @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ $(GIMPLE_PRETTY_PRINT_H) langhooks.h tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ + $(TREE_VECTORIZER_H) tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ $(TREE_PRETTY_PRINT_H) tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c =================================================================== --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_perm } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); +void f (vec *px, vec *y, vec *z) +{ + vec x = *px; + vec t1 = { x[1], x[0] }; + vec t2 = { x[0], x[1] }; + *y = t1; + *z = t2; +} + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Author Date Id Revision URL Index: tree-ssa-forwprop.c =================================================================== --- tree-ssa-forwprop.c (revision 191173) +++ tree-ssa-forwprop.c (working copy) @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. #include "tm_p.h" #include "basic-block.h" #include "gimple-pretty-print.h" #include "tree-flow.h" #include "tree-pass.h" #include "langhooks.h" #include "flags.h" #include "gimple.h" #include "expr.h" #include "cfgloop.h" +#include "tree-vectorizer.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized form of tree combination. It is hoped all of this can disappear when we have a generalized tree combiner. One class of common cases we handle is forward propagating a single use variable into a COND_EXPR. bb0: @@ -2787,20 +2788,94 @@ simplify_permutation (gimple_stmt_iterat if (TREE_CODE (op0) == SSA_NAME) ret = remove_prop_source_from_use (op0); if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) ret |= remove_prop_source_from_use (op1); return ret ? 2 : 1; } return 0; } +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ + +static bool +simplify_vector_constructor (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + gimple def_stmt; + tree op, op2, orig, type, elem_type; + unsigned elem_size, nelts, i; + enum tree_code code; + constructor_elt *elt; + unsigned char *sel; + + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); + + op = gimple_assign_rhs1 (stmt); + type = TREE_TYPE (op); + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); + + nelts = TYPE_VECTOR_SUBPARTS (type); + elem_type = TREE_TYPE (type); + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); + + sel = XALLOCAVEC (unsigned char, nelts); + orig = NULL; + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) + { + tree ref, op1; + + if (i >= nelts) + return false; + + if (TREE_CODE (elt->value) != SSA_NAME) + return false; + def_stmt = SSA_NAME_DEF_STMT (elt->value); + if (!def_stmt || !is_gimple_assign (def_stmt)) + return false; + code = gimple_assign_rhs_code (def_stmt); + if (code != BIT_FIELD_REF) + return false; + op1 = gimple_assign_rhs1 (def_stmt); + ref = TREE_OPERAND (op1, 0); + if (orig) + { + if (ref != orig) + return false; + } + else + { + if (TREE_CODE (ref) != SSA_NAME) + return false; + orig = ref; + } + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) + return false; + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; + } + if (i < nelts) + return false; + + op2 = vect_gen_perm_mask (type, sel); + if (!op2) + return false; + orig = unshare_expr (orig); + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); + gimple_set_num_ops (stmt, 4); + gimple_assign_set_rhs1 (stmt, orig); + gimple_assign_set_rhs2 (stmt, orig); + gimple_assign_set_rhs3 (stmt, op2); + fold_stmt (gsi); + update_stmt (gsi_stmt (*gsi)); + return true; +} + /* Main entry point for the forward propagation and statement combine optimizer. */ static unsigned int ssa_forward_propagate_and_combine (void) { basic_block bb; unsigned int todoflags = 0; cfg_changed = false; @@ -2958,20 +3033,23 @@ ssa_forward_propagate_and_combine (void) } else if (code == VEC_PERM_EXPR) { int did_something = simplify_permutation (&gsi); if (did_something == 2) cfg_changed = true; changed = did_something != 0; } else if (code == BIT_FIELD_REF) changed = simplify_bitfield_ref (&gsi); + else if (code == CONSTRUCTOR + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) + changed = simplify_vector_constructor (&gsi); break; } case GIMPLE_SWITCH: changed = simplify_gimple_switch (stmt); break; case GIMPLE_COND: { int did_something;