Hi all,

Just looking for some feedback on the approach.

Currently the loop vectorizer can't vectorize the following typical loop 
for getting max value and index from an array:

void test_vec(int *data, int n) {
         int best_i, best = 0;

         for (int i = 0; i < n; i++) {
                 if (data[i] > best) {
                         best = data[i];
                         best_i = i;
                 }
         }

         data[best_i] = data[0];
         data[0] = best;
}

This patch adds some support for this pattern.

This patch addresses Bug 88259.

Regression testing is still in progress,
This patch does not work correctly with vect-epilogues-nomask, and the 
reason for that is still being investigated.

gcc/ChangeLog:


2019-11-15  Joel Hutton  <joel.hut...@arm.com>
         Tamar Christina  <tamar.christ...@arm.com>

     PR tree-optimization/88259
     * tree-vect-loop.c (vect_reassociating_reduction_simple_p): New 
function.
     (vect_recog_minmax_index_pattern): New function.
     (vect_is_simple_reduction): Add check for minmax pattern.
     (vect_model_reduction_cost): Add case for minmax pattern.
     (vect_create_epilog_for_reduction): Add fixup for minmax epilog.
     * tree-vectorizer.h (enum vect_reduction_type): Add 
INDEX_MINMAX_REDUCTION reduction type.
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index b600d3157457c3180d0456c4f66cbc57012e3c71..dc97dea38a504e8f9391e6d138aad0a2e3872b50 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -387,6 +387,83 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
   return opt_result::success ();
 }
 
+static bool
+vect_reassociating_reduction_simple_p (stmt_vec_info stmt_info, tree_code code,
+				       tree *op0_out, tree *op1_out)
+{
+  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+  if (!loop_info)
+    return false;
+
+  gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+  if (!assign || gimple_assign_rhs_code (assign) != code)
+    return false;
+
+  /* We don't allow changing the order of the computation in the inner-loop
+     when doing outer-loop vectorization.  */
+  class loop *loop = LOOP_VINFO_LOOP (loop_info);
+  if (loop && nested_in_vect_loop_p (loop, stmt_info))
+    return false;
+
+  *op0_out = gimple_assign_rhs1 (assign);
+  *op1_out = gimple_assign_rhs2 (assign);
+  return true;
+}
+
+
+static bool
+vect_recog_minmax_index_pattern (stmt_vec_info stmt_vinfo, loop_vec_info loop_info)
+{
+  tree oprnd0, oprnd1;
+  gimple *last_stmt = stmt_vinfo->stmt;
+  vec_info *vinfo = stmt_vinfo->vinfo;
+  gimple *use_stmt;
+  use_operand_p use_p;
+  imm_use_iterator iter;
+
+  /* Starting from LAST_STMT, follow the defs of its uses in search
+     of the above pattern.  */
+
+  if (!vect_reassociating_reduction_simple_p (stmt_vinfo, MAX_EXPR,
+					      &oprnd0, &oprnd1))
+    return NULL;
+
+  if (!is_a <gphi *> (SSA_NAME_DEF_STMT (oprnd1)))
+    return NULL;
+
+  stmt_vec_info phy_vinfo = vinfo->lookup_def (oprnd1);
+  if (!phy_vinfo)
+    return NULL;
+
+  basic_block top_bb = gimple_bb (last_stmt);
+
+  FOR_EACH_IMM_USE_FAST (use_p, iter, oprnd1)
+    {
+      use_stmt = USE_STMT (use_p);
+      if (is_gimple_assign (use_stmt)
+	  && gimple_assign_rhs_code (use_stmt) == COND_EXPR)
+	{
+	  basic_block bb = gimple_bb (use_stmt);
+
+	  if (bb == top_bb
+	      && gimple_uid (use_stmt) < gimple_uid (last_stmt))
+	    {
+	      tree cond = gimple_assign_rhs1 (use_stmt);
+	      if (TREE_CODE (cond) != LE_EXPR)
+		continue;
+
+	      stmt_vec_info ind_stmt = loop_info->lookup_stmt (use_stmt);
+	      ind_stmt->reduc_related_stmt = stmt_vinfo;
+	      stmt_vinfo->reduc_related_stmt = ind_stmt;
+	      return true;
+	    }
+	}
+    }
+
+  return false;
+}
+
+
 
 /* Function vect_is_simple_iv_evolution.
 
@@ -2771,20 +2848,6 @@ pop:
 	  fail = true;
 	  break;
 	}
-      /* Check there's only a single stmt the op is used on inside
-         of the loop.  */
-      imm_use_iterator imm_iter;
-      gimple *op_use_stmt;
-      unsigned cnt = 0;
-      FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
-	if (!is_gimple_debug (op_use_stmt)
-	    && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
-	  cnt++;
-      if (cnt != 1)
-	{
-	  fail = true;
-	  break;
-	}
       tree_code use_code = gimple_assign_rhs_code (use_stmt);
       if (use_code == MINUS_EXPR)
 	{
@@ -2963,10 +3026,22 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
       return def_stmt_info;
     }
 
+  /* Detect if this is a reduction that we have special code to handle the use
+     of the reduction multiple times.  */
+  bool supported_multi_use_reduction
+    = vect_recog_minmax_index_pattern (def_stmt_info, loop_info);
+
+  /* Update the reduction type for multi use reduction.  */
+  if (supported_multi_use_reduction)
+    STMT_VINFO_REDUC_TYPE (phi_info) = INDEX_MINMAX_REDUCTION;
+
+  def_stmt_info->reduc_phi_node = phi_info;
+
   /* If this isn't a nested cycle or if the nested cycle reduction value
      is used ouside of the inner loop we cannot handle uses of the reduction
      value.  */
-  if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
+  if (!supported_multi_use_reduction
+      && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3737,7 +3812,8 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
     {
       if (reduc_fn != IFN_LAST)
 	{
-	  if (reduction_type == COND_REDUCTION)
+	  if (reduction_type == COND_REDUCTION
+	      || reduction_type == INDEX_MINMAX_REDUCTION)
 	    {
 	      /* An EQ stmt and an COND_EXPR stmt.  */
 	      epilogue_cost += record_stmt_cost (cost_vec, 2,
@@ -4764,6 +4840,14 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
 
       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
       gimple_set_lhs (epilog_stmt, new_temp);
+
+      if (stmt_info->reduc_related_stmt
+	  && stmt_info->reduc_related_stmt->reduc_phi_node
+	  && stmt_info->reduc_related_stmt->reduc_phi_node->reduc_type == INDEX_MINMAX_REDUCTION)
+      {
+	stmt_info->reduc_minmax_epilog_stmt = epilog_stmt;
+      }
+
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
 
       if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
@@ -5008,6 +5092,7 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
 
 	  gimple_seq stmts = NULL;
 	  new_temp = gimple_convert (&stmts, vectype1, new_temp);
+	  tree maxresultvec = new_temp;
           for (elt_offset = nelements / 2;
                elt_offset >= 1;
                elt_offset /= 2)
@@ -5034,7 +5119,54 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
 	  epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
 	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
 	  gimple_assign_set_lhs (epilog_stmt, new_temp);
+
+	  stmt_info->reduc_minmax_scalar_result = epilog_stmt;
 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+
+	  /* Fixup the minmax epilog.  */
+	  if (stmt_info->reduc_related_stmt
+	      && stmt_info->reduc_related_stmt->reduc_minmax_epilog_stmt)
+	  {
+	    gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->reduc_related_stmt->reduc_minmax_epilog_stmt);
+	    gimple *pre_stmt = NULL;
+	    gimple *mm_i_ep_stmt = stmt_info->reduc_related_stmt->reduc_minmax_epilog_stmt;
+	    tree mm_i_lhs = gimple_get_lhs (mm_i_ep_stmt);
+	    tree mm_i_rhs = gimple_call_arg (mm_i_ep_stmt, 0);
+	    tree vectype = TREE_TYPE (mm_i_rhs);
+	    /* Scalar result of min/max in minmax reduction.  */
+	    tree maxval = gimple_get_lhs (stmt_info->reduc_minmax_scalar_result);
+	    tree maxvec = build_vector_from_val (vectype, maxval);
+	    gimple* maxvec_decl = gimple_build_assign (make_ssa_name (vectype),
+						       maxvec);
+	    gsi_insert_before (&gsi, maxvec_decl, GSI_SAME_STMT);
+	    tree maxvec_lhs = gimple_get_lhs (maxvec_decl);
+	    enum tree_code cond_code = EQ_EXPR;
+	    tree vec_comp_type = build_same_sized_truth_vector_type (vectype);
+	    /* Produce a mask that gives the position of max element in max
+	       vector.  */
+	    tree mask = build2 (cond_code, vec_comp_type, maxvec_lhs,
+				maxresultvec);
+	    tree _mask_ssa_name = make_ssa_name (vec_comp_type);
+	    gimple* _mask_decl = gimple_build_assign (_mask_ssa_name,
+						     mask);
+	    gsi_insert_before (&gsi, _mask_decl, GSI_SAME_STMT);
+	    tree mask_ssa_name = make_ssa_name (vectype);
+	    gimple* mask_decl = gimple_build_assign (mask_ssa_name, CONVERT_EXPR, _mask_ssa_name);
+	    gsi_insert_before (&gsi, mask_decl, GSI_SAME_STMT);
+	    /* Apply the mask to the index vector to select index value in
+	       corresponding position.  */
+	    tree new_ssa = make_ssa_name (vectype);
+	    pre_stmt = gimple_build_assign (new_ssa, BIT_AND_EXPR, mask_ssa_name, mm_i_rhs);
+	    gsi_insert_before (&gsi, pre_stmt, GSI_SAME_STMT);
+	    /* Reduce the index vector to a scalar result.  */
+	    gimple* new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
+							  1,
+							  gimple_get_lhs (pre_stmt));
+	    gimple_set_lhs (new_stmt, mm_i_lhs);
+	    gsi = gsi_for_stmt (mm_i_ep_stmt);
+	    gsi_replace (&gsi, new_stmt, 1);
+	  }
+
 	  scalar_results.safe_push (new_temp);
         }
       else
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index e9575a184ad02787cbdc6ea9059ef1dc35fbca94..8ff6266ecf08213a0f19191c4091bf10b4d6ae37 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -85,7 +85,17 @@ enum vect_reduction_type {
 	 res = res OP val[i];
 
      (with no reassocation).  */
-  FOLD_LEFT_REDUCTION
+  FOLD_LEFT_REDUCTION,
+
+  /* Reduction of an if selecting both the maximum/minimum element and the
+     corresponding index.
+
+      for (int i = 0; i < n; i++) {
+	if (data[i] > best) {
+		best = data[i];
+		best_i = i;
+	}  */
+  INDEX_MINMAX_REDUCTION
 };
 
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \
@@ -967,6 +977,15 @@ public:
         pattern).  */
   stmt_vec_info related_stmt;
 
+
+  stmt_vec_info reduc_related_stmt;
+
+  gimple* reduc_minmax_epilog_stmt;
+
+  gimple* reduc_minmax_scalar_result;
+
+  stmt_vec_info reduc_phi_node;
+
   /* Used to keep a sequence of def stmts of a pattern stmt if such exists.
      The sequence is attached to the original statement rather than the
      pattern statement.  */

Reply via email to