Hi,
This patch fixes the inefficient code generated by vectorizer as reported by 
PR69848.
It introduces new conditional reduction type CONST_COND_REDUCTION.  As a result,
we don't need to compute index vector in loop; also the epilog reduction code 
can be 
simplified using single reduc_max/reduc_min operation.  Together with AArch64 
vcond 
patches, the # of insns in loop body is reduced from 10 to 4 on AArch64.  Note, 
this one
doesn't handle cases in which reduction values are loop invariants because it 
requires 
quite different code to current implementation, and I failed to work out a 
"clean" patch at 
the moment.

Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin

2016-08-08  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/69848
        * tree-vectorizer.h (enum vect_def_type): New condition reduction
        type CONST_COND_REDUCTION.
        * tree-vect-loop.c (vectorizable_reduction): Support new condition
        reudction type CONST_COND_REDUCTION.

gcc/testsuite/ChangeLog
2016-08-08  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/69848
        * gcc.dg/vect/vect-pr69848.c: New test.
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 41c4c29..4957b66 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5416,7 +5416,7 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
   optab optab, reduc_optab;
   tree new_temp = NULL_TREE;
   gimple *def_stmt;
-  enum vect_def_type dt;
+  enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
   gphi *new_phi = NULL;
   tree scalar_type;
   bool is_simple_use;
@@ -5447,7 +5447,7 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
   tree def0, def1, tem, op0, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
-  gimple *cond_expr_induction_def_stmt = NULL;
+  tree cond_reduc_val = NULL_TREE, const_cond_cmp = NULL_TREE;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5597,8 +5597,18 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
           reduc_index = i;
         }
 
-      if (i == 1 && code == COND_EXPR && dt == vect_induction_def)
-       cond_expr_induction_def_stmt = def_stmt;
+      if (i == 1 && code == COND_EXPR)
+       {
+         /* Record how value of COND_EXPR is defined.  */
+         if (dt == vect_constant_def)
+           {
+             cond_reduc_dt = dt;
+             cond_reduc_val = ops[i];
+           }
+         if (dt == vect_induction_def && def_stmt != NULL
+             && is_nonwrapping_integer_induction (def_stmt, loop))
+           cond_reduc_dt = dt;
+       }
     }
 
   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
@@ -5630,18 +5640,49 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
                                          !nested_cycle, &dummy, false,
                                          &v_reduc_type);
 
+  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
   /* If we have a condition reduction, see if we can simplify it further.  */
-  if (v_reduc_type == COND_REDUCTION
-      && cond_expr_induction_def_stmt != NULL
-      && is_nonwrapping_integer_induction (cond_expr_induction_def_stmt, loop))
+  if (v_reduc_type == COND_REDUCTION)
     {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "condition expression based on integer induction.\n");
-      STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
+      if (cond_reduc_dt == vect_induction_def)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "condition expression based on "
+                            "integer induction.\n");
+         STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+           = INTEGER_INDUC_COND_REDUCTION;
+       }
+
+      if (cond_reduc_dt == vect_constant_def)
+       {
+         enum vect_def_type cond_initial_dt;
+         gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
+         tree cond_initial_val
+           = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+
+         gcc_assert (cond_reduc_val != NULL_TREE);
+         vect_is_simple_use (cond_initial_val, loop_vinfo,
+                             &def_stmt, &cond_initial_dt);
+         if (cond_initial_dt == vect_constant_def
+             && types_compatible_p (TREE_TYPE (cond_initial_val),
+                                    TREE_TYPE (cond_reduc_val)))
+           {
+             tree e = fold_build2 (LE_EXPR, boolean_type_node,
+                                   cond_initial_val, cond_reduc_val);
+             if (e && (integer_onep (e) || integer_zerop (e)))
+               {
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_NOTE, vect_location,
+                                    "condition expression based on "
+                                    "compile time constant.\n");
+                 const_cond_cmp = e;
+                 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+                   = CONST_COND_REDUCTION;
+               }
+           }
+       }
     }
-  else
-   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
 
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
@@ -5787,8 +5828,15 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
 
       /* For simple condition reductions, replace with the actual expression
         we want to base our reduction around.  */
-      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-         == INTEGER_INDUC_COND_REDUCTION)
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
+       {
+         gcc_assert (const_cond_cmp != NULL_TREE);
+         gcc_assert (integer_onep (const_cond_cmp)
+                     || integer_zerop (const_cond_cmp));
+         orig_code = integer_onep (const_cond_cmp) ? MAX_EXPR : MIN_EXPR;
+       }
+      else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+                == INTEGER_INDUC_COND_REDUCTION)
        orig_code = MAX_EXPR;
     }
 
@@ -5810,9 +5858,7 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
 
   epilog_reduc_code = ERROR_MARK;
 
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
-      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-               == INTEGER_INDUC_COND_REDUCTION)
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
     {
       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
        {
@@ -5839,8 +5885,10 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
             generated in the epilog using multiple expressions.  This does not
             work for condition reductions.  */
          if (epilog_reduc_code == ERROR_MARK
-             && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-                       == INTEGER_INDUC_COND_REDUCTION)
+             && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+                       == INTEGER_INDUC_COND_REDUCTION
+                 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+                       == CONST_COND_REDUCTION))
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5881,9 +5929,7 @@ vectorizable_reduction (gimple *stmt, 
gimple_stmt_iterator *gsi,
     }
 
   if ((double_reduc
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-               == INTEGER_INDUC_COND_REDUCTION)
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
       && ncopies > 1)
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 31570d8..29ef676 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -65,7 +65,8 @@ enum vect_def_type {
 enum vect_reduction_type {
   TREE_CODE_REDUCTION,
   COND_REDUCTION,
-  INTEGER_INDUC_COND_REDUCTION
+  INTEGER_INDUC_COND_REDUCTION,
+  CONST_COND_REDUCTION
 };
 
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr69848.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr69848.c
new file mode 100644
index 0000000..7c6e33b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr69848.c
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 256
+int a[N] = {0};
+
+__attribute__ ((noinline))
+int foo ()
+{
+  int i, res = 0;
+  for (i = 0; i < N; i++)
+  {
+    if (a[i] != 0)
+      res = 1;
+  }
+  return res;
+}
+
+int main (void)
+{
+  int i, res;
+
+  check_vect ();
+
+  if ((res = foo ()) != 0)
+    abort ();
+
+  a[34] = 101;
+  a[85] = 9;
+  if ((res = foo ()) != 1)
+    abort ();
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! 
vect_max_reduc } } } } */

Reply via email to