Hello,

here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the target is ok with it.

I am attaching 2 versions of the patch. p-good is the one that passes testing. p-bad, where I rely on fold_stmt to detect identity permutations, ICEs towards the end of the pass while checking a bogus gimple stmt (one that gimple_debug_stmt crashes on if I call it in gdb). From a performance point of view, p-good makes sense, but I liked the simplicity of p-bad and I am confused as to why it fails.

2012-09-11  Marc Glisse  <marc.gli...@inria.fr>

gcc/
        * tree-ssa-forwprop.c (simplify_vector_constructor): New function.
        (ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
        * gcc.dg/tree-ssa/forwprop-22.c: New testcase.

--
Marc Glisse
Index: Makefile.in
===================================================================
--- Makefile.in (revision 191173)
+++ Makefile.in (working copy)
@@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR
    $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \
    $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \
    $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \
    $(GIMPLE_PRETTY_PRINT_H) langhooks.h
 tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
-   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H)
+   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
+   $(TREE_VECTORIZER_H)
 tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    $(TREE_PRETTY_PRINT_H)
 tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
+++ testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_perm } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef double vec __attribute__((vector_size (2 * sizeof (double))));
+void f (vec *px, vec *y, vec *z)
+{
+  vec x = *px;
+  vec t1 = { x[1], x[0] };
+  vec t2 = { x[0], x[1] };
+  *y = t1;
+  *z = t2;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: tree-ssa-forwprop.c
===================================================================
--- tree-ssa-forwprop.c (revision 191173)
+++ tree-ssa-forwprop.c (working copy)
@@ -26,20 +26,21 @@ along with GCC; see the file COPYING3.
 #include "tm_p.h"
 #include "basic-block.h"
 #include "gimple-pretty-print.h"
 #include "tree-flow.h"
 #include "tree-pass.h"
 #include "langhooks.h"
 #include "flags.h"
 #include "gimple.h"
 #include "expr.h"
 #include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* This pass propagates the RHS of assignment statements into use
    sites of the LHS of the assignment.  It's basically a specialized
    form of tree combination.   It is hoped all of this can disappear
    when we have a generalized tree combiner.
 
    One class of common cases we handle is forward propagating a single use
    variable into a COND_EXPR.
 
      bb0:
@@ -2787,20 +2788,105 @@ simplify_permutation (gimple_stmt_iterat
       if (TREE_CODE (op0) == SSA_NAME)
        ret = remove_prop_source_from_use (op0);
       if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
        ret |= remove_prop_source_from_use (op1);
       return ret ? 2 : 1;
     }
 
   return 0;
 }
 
+/* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
+
+static bool
+simplify_vector_constructor (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op, op2, orig, type, elem_type;
+  unsigned elem_size, nelts, i;
+  enum tree_code code;
+  constructor_elt *elt;
+  unsigned char *sel;
+  bool maybe_ident;
+
+  gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
+
+  op = gimple_assign_rhs1 (stmt);
+  type = TREE_TYPE (op);
+  gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE);
+
+  nelts = TYPE_VECTOR_SUBPARTS (type);
+  elem_type = TREE_TYPE (type);
+  elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+
+  sel = XALLOCAVEC (unsigned char, nelts);
+  orig = NULL;
+  maybe_ident = true;
+  FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt)
+    {
+      tree ref, op1;
+
+      if (i >= nelts)
+       return false;
+
+      if (TREE_CODE (elt->value) != SSA_NAME)
+       return false;
+      def_stmt = SSA_NAME_DEF_STMT (elt->value);
+      if (!def_stmt || !is_gimple_assign (def_stmt))
+       return false;
+      code = gimple_assign_rhs_code (def_stmt);
+      if (code != BIT_FIELD_REF)
+       return false;
+      op1 = gimple_assign_rhs1 (def_stmt);
+      ref = TREE_OPERAND (op1, 0);
+      if (orig)
+       {
+         if (ref != orig)
+           return false;
+       }
+      else
+       {
+         if (TREE_CODE (ref) != SSA_NAME)
+           return false;
+         orig = ref;
+       }
+      if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
+       return false;
+      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
+      if (sel[i] != i) maybe_ident = false;
+    }
+  if (i < nelts)
+    return false;
+
+  if (maybe_ident)
+    {
+      gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
+      gimple_set_num_ops (stmt, 2);
+      gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
+      update_stmt (stmt);
+      return true;
+    }
+
+  op2 = vect_gen_perm_mask (type, sel);
+  if (!op2)
+    return false;
+  orig = unshare_expr (orig);
+  gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR);
+  gimple_set_num_ops (stmt, 4);
+  gimple_assign_set_rhs1 (stmt, orig);
+  gimple_assign_set_rhs2 (stmt, orig);
+  gimple_assign_set_rhs3 (stmt, op2);
+  update_stmt (stmt);
+  return true;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2958,20 +3044,23 @@ ssa_forward_propagate_and_combine (void)
                  }
                else if (code == VEC_PERM_EXPR)
                  {
                    int did_something = simplify_permutation (&gsi);
                    if (did_something == 2)
                      cfg_changed = true;
                    changed = did_something != 0;
                  }
                else if (code == BIT_FIELD_REF)
                  changed = simplify_bitfield_ref (&gsi);
+                else if (code == CONSTRUCTOR
+                         && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
+                  changed = simplify_vector_constructor (&gsi);
                break;
              }
 
            case GIMPLE_SWITCH:
              changed = simplify_gimple_switch (stmt);
              break;
 
            case GIMPLE_COND:
              {
                int did_something;
Index: Makefile.in
===================================================================
--- Makefile.in (revision 191173)
+++ Makefile.in (working copy)
@@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR
    $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \
    $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \
    $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \
    $(GIMPLE_PRETTY_PRINT_H) langhooks.h
 tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
-   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H)
+   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
+   $(TREE_VECTORIZER_H)
 tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    $(TREE_PRETTY_PRINT_H)
 tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
+++ testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_perm } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef double vec __attribute__((vector_size (2 * sizeof (double))));
+void f (vec *px, vec *y, vec *z)
+{
+  vec x = *px;
+  vec t1 = { x[1], x[0] };
+  vec t2 = { x[0], x[1] };
+  *y = t1;
+  *z = t2;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision URL

Index: tree-ssa-forwprop.c
===================================================================
--- tree-ssa-forwprop.c (revision 191173)
+++ tree-ssa-forwprop.c (working copy)
@@ -26,20 +26,21 @@ along with GCC; see the file COPYING3.
 #include "tm_p.h"
 #include "basic-block.h"
 #include "gimple-pretty-print.h"
 #include "tree-flow.h"
 #include "tree-pass.h"
 #include "langhooks.h"
 #include "flags.h"
 #include "gimple.h"
 #include "expr.h"
 #include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* This pass propagates the RHS of assignment statements into use
    sites of the LHS of the assignment.  It's basically a specialized
    form of tree combination.   It is hoped all of this can disappear
    when we have a generalized tree combiner.
 
    One class of common cases we handle is forward propagating a single use
    variable into a COND_EXPR.
 
      bb0:
@@ -2787,20 +2788,94 @@ simplify_permutation (gimple_stmt_iterat
       if (TREE_CODE (op0) == SSA_NAME)
        ret = remove_prop_source_from_use (op0);
       if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
        ret |= remove_prop_source_from_use (op1);
       return ret ? 2 : 1;
     }
 
   return 0;
 }
 
+/* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
+
+static bool
+simplify_vector_constructor (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op, op2, orig, type, elem_type;
+  unsigned elem_size, nelts, i;
+  enum tree_code code;
+  constructor_elt *elt;
+  unsigned char *sel;
+
+  gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
+
+  op = gimple_assign_rhs1 (stmt);
+  type = TREE_TYPE (op);
+  gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE);
+
+  nelts = TYPE_VECTOR_SUBPARTS (type);
+  elem_type = TREE_TYPE (type);
+  elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+
+  sel = XALLOCAVEC (unsigned char, nelts);
+  orig = NULL;
+  FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt)
+    {
+      tree ref, op1;
+
+      if (i >= nelts)
+       return false;
+
+      if (TREE_CODE (elt->value) != SSA_NAME)
+       return false;
+      def_stmt = SSA_NAME_DEF_STMT (elt->value);
+      if (!def_stmt || !is_gimple_assign (def_stmt))
+       return false;
+      code = gimple_assign_rhs_code (def_stmt);
+      if (code != BIT_FIELD_REF)
+       return false;
+      op1 = gimple_assign_rhs1 (def_stmt);
+      ref = TREE_OPERAND (op1, 0);
+      if (orig)
+       {
+         if (ref != orig)
+           return false;
+       }
+      else
+       {
+         if (TREE_CODE (ref) != SSA_NAME)
+           return false;
+         orig = ref;
+       }
+      if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
+       return false;
+      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
+    }
+  if (i < nelts)
+    return false;
+
+  op2 = vect_gen_perm_mask (type, sel);
+  if (!op2)
+    return false;
+  orig = unshare_expr (orig);
+  gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR);
+  gimple_set_num_ops (stmt, 4);
+  gimple_assign_set_rhs1 (stmt, orig);
+  gimple_assign_set_rhs2 (stmt, orig);
+  gimple_assign_set_rhs3 (stmt, op2);
+  fold_stmt (gsi);
+  update_stmt (gsi_stmt (*gsi));
+  return true;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2958,20 +3033,23 @@ ssa_forward_propagate_and_combine (void)
                  }
                else if (code == VEC_PERM_EXPR)
                  {
                    int did_something = simplify_permutation (&gsi);
                    if (did_something == 2)
                      cfg_changed = true;
                    changed = did_something != 0;
                  }
                else if (code == BIT_FIELD_REF)
                  changed = simplify_bitfield_ref (&gsi);
+                else if (code == CONSTRUCTOR
+                         && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
+                  changed = simplify_vector_constructor (&gsi);
                break;
              }
 
            case GIMPLE_SWITCH:
              changed = simplify_gimple_switch (stmt);
              break;
 
            case GIMPLE_COND:
              {
                int did_something;

Reply via email to