Hi:
  Details described in PR.
  Bootstrapped and regtest on
x86_64-linux-gnu{-m32,}/x86_64-linux-gnu{-m32\
-march=cascadelake,-march=cascadelake}
  Ok for trunk?

  gcc/ChangeLog:

        PR tree-optimization/pr98365
        * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
        (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
        (convert_scalar_cond_reduction): Ditto.
        (predicate_scalar_phi): Ditto.

gcc/testsuite/ChangeLog:

        PR tree-optimization/pr98365
        * gcc.target/i386/pr98365.c: New test.

-- 
BR,
Hongtao
From 0248efe310454a31e4bbeb1430f907ec663c39ae Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Wed, 6 Jan 2021 16:33:27 +0800
Subject: [PATCH] Extend is_cond_scalar_reduction to handle nop_expr
 after/before scalar reduction.[PR98365]

gcc/ChangeLog:

	PR tree-optimization/pr98365
	* tree-if-conv.c (strip_nop_cond_scalar_reduction): New function.
	(is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction.
	(convert_scalar_cond_reduction): Ditto.
	(predicate_scalar_phi): Ditto.

gcc/testsuite/ChangeLog:

	PR tree-optimization/pr98365
	* gcc.target/i386/pr98365.c: New test.
---
 gcc/testsuite/gcc.target/i386/pr98365.c |  22 ++++
 gcc/tree-if-conv.c                      | 131 +++++++++++++++++++++---
 2 files changed, 141 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98365.c

diff --git a/gcc/testsuite/gcc.target/i386/pr98365.c b/gcc/testsuite/gcc.target/i386/pr98365.c
new file mode 100644
index 00000000000..652210dcdd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98365.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
+short foo1 (short* a, short* c, int n)
+{
+  int i;
+  short cnt=0;
+  for (int i = 0;i != n; i++)
+    if (a[i] == c[i])
+      cnt++;
+  return cnt;
+}
+
+char foo2 (char* a, char* c, int n)
+{
+  int i;
+  char cnt=0;
+  for (int i = 0;i != n; i++)
+    if (a[i] == c[i])
+      cnt++;
+  return cnt;
+}
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 716eae44a21..5811eb00478 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -1579,6 +1579,31 @@ if_convertible_loop_p (class loop *loop)
   return res;
 }
 
+/* Return reduc_1 if has_nop.
+
+   if (...)
+     tmp1 = (unsigned type) reduc_1;
+     tmp2 = tmp1 + rhs2;
+     reduc_3 = (signed type) tmp2.  */
+static tree
+strip_nop_cond_scalar_reduction (bool has_nop, tree op)
+{
+  if (!has_nop)
+    return op;
+
+  if (TREE_CODE (op) != SSA_NAME)
+    return NULL_TREE;
+
+  gimple* stmt = SSA_NAME_DEF_STMT (op);
+  if (!stmt
+      || gimple_code (stmt) != GIMPLE_ASSIGN
+      || gimple_has_volatile_ops (stmt)
+      || gimple_assign_rhs_code (stmt) != NOP_EXPR)
+    return NULL_TREE;
+
+  return gimple_assign_rhs1 (stmt);
+}
+
 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
    which is in predicated basic block.
    In fact, the following PHI pattern is searching:
@@ -1595,9 +1620,10 @@ if_convertible_loop_p (class loop *loop)
 
 static bool
 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
-			  tree *op0, tree *op1, bool extended)
+			  tree *op0, tree *op1, bool extended, bool* has_nop,
+			  gimple **nop_reduc)
 {
-  tree lhs, r_op1, r_op2;
+  tree lhs, r_op1, r_op2, r_nop1, r_nop2;
   gimple *stmt;
   gimple *header_phi = NULL;
   enum tree_code reduction_op;
@@ -1608,7 +1634,7 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
   use_operand_p use_p;
   edge e;
   edge_iterator ei;
-  bool result = false;
+  bool result = *has_nop = false;
   if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
     return false;
 
@@ -1656,18 +1682,78 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
     return false;
 
   reduction_op = gimple_assign_rhs_code (stmt);
+
+    /* Catch something like below
+
+     loop-header:
+     reduc_1 = PHI <..., reduc_2>
+     ...
+     if (...)
+     tmp1 = (unsigned type) reduc_1;
+     tmp2 = tmp1 + rhs2;
+     reduc_3 = (signed type) tmp2;
+
+     reduc_2 = PHI <reduc_1, reduc_3>
+
+     and convert to
+
+     reduc_2 = PHI <0, reduc_3>
+     tmp1 = (unsigned type)reduce_1;
+     ifcvt = cond_expr ? rhs2 : 0
+     tmp2 = tmp1 +/- ifcvt;
+     reduce_1 = (signed type)tmp2;  */
+
+  if (reduction_op == NOP_EXPR)
+    {
+      lhs = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (lhs) != SSA_NAME
+	  || !has_single_use (lhs))
+	return false;
+
+      *nop_reduc = stmt;
+      stmt = SSA_NAME_DEF_STMT (lhs);
+      if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
+	  || gimple_code (stmt) != GIMPLE_ASSIGN
+	  || gimple_has_volatile_ops (stmt))
+	return false;
+
+      *has_nop = true;
+      reduction_op = gimple_assign_rhs_code (stmt);
+    }
+
   if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
     return false;
   r_op1 = gimple_assign_rhs1 (stmt);
   r_op2 = gimple_assign_rhs2 (stmt);
 
+  r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
+  r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
+
   /* Make R_OP1 to hold reduction variable.  */
-  if (r_op2 == PHI_RESULT (header_phi)
+  if (r_nop2 == PHI_RESULT (header_phi)
       && reduction_op == PLUS_EXPR)
-    std::swap (r_op1, r_op2);
-  else if (r_op1 != PHI_RESULT (header_phi))
+    {
+      std::swap (r_op1, r_op2);
+      std::swap (r_nop1, r_nop2);
+    }
+  else if (r_nop1 != PHI_RESULT (header_phi))
     return false;
 
+  if (*has_nop)
+    {
+      /* Check that R_NOP1 is used in nop_stmt or in PHI only.  */
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (is_gimple_debug (use_stmt))
+	    continue;
+	  if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
+	    continue;
+	  if (gimple_code (use_stmt) != GIMPLE_PHI)
+	    return false;
+	}
+    }
+
   /* Check that R_OP1 is used in reduction stmt or in PHI only.  */
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
     {
@@ -1705,7 +1791,8 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
 
 static tree
 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
-			       tree cond, tree op0, tree op1, bool swap)
+			       tree cond, tree op0, tree op1, bool swap,
+			       bool has_nop, gimple* nop_reduc)
 {
   gimple_stmt_iterator stmt_it;
   gimple *new_assign;
@@ -1735,6 +1822,23 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
   rhs = fold_build2 (gimple_assign_rhs_code (reduc),
 		     TREE_TYPE (rhs1), op0, tmp);
 
+  if (has_nop)
+    {
+      /* Create assignment nop_rhs = op0 +/- _ifc_.  */
+      tree nop_rhs = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_nop_");
+      gimple* new_assign2 = gimple_build_assign (nop_rhs, rhs);
+      gsi_insert_before (gsi, new_assign2, GSI_SAME_STMT);
+      /* Rebuild rhs for nop_expr.  */
+      rhs = fold_build1 (NOP_EXPR,
+			 TREE_TYPE (gimple_assign_lhs (nop_reduc)),
+			 nop_rhs);
+
+      /* Delete nop_reduc.  */
+      stmt_it = gsi_for_stmt (nop_reduc);
+      gsi_remove (&stmt_it, true);
+      release_defs (nop_reduc);
+    }
+
   /* Delete original reduction stmt.  */
   stmt_it = gsi_for_stmt (reduc);
   gsi_remove (&stmt_it, true);
@@ -1808,7 +1912,7 @@ ifcvt_follow_ssa_use_edges (tree val)
 static void
 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 {
-  gimple *new_stmt = NULL, *reduc;
+  gimple *new_stmt = NULL, *reduc, *nop_reduc;
   tree rhs, res, arg0, arg1, op0, op1, scev;
   tree cond;
   unsigned int index0;
@@ -1816,6 +1920,7 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
   edge e;
   basic_block bb;
   unsigned int i;
+  bool has_nop;
 
   res = gimple_phi_result (phi);
   if (virtual_operand_p (res))
@@ -1876,10 +1981,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 	  arg1 = gimple_phi_arg_def (phi, 1);
 	}
       if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
-				    &op0, &op1, false))
+				    &op0, &op1, false, &has_nop,
+				    &nop_reduc))
 	/* Convert reduction stmt into vectorizable form.  */
 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					     true_bb != gimple_bb (reduc));
+					     true_bb != gimple_bb (reduc),
+					     has_nop, nop_reduc);
       else
 	/* Build new RHS using selected condition and arguments.  */
 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
@@ -1961,14 +2068,14 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 					 is_gimple_condexpr, NULL_TREE,
 					 true, GSI_SAME_STMT);
       if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
-				      &op0, &op1, true)))
+				      &op0, &op1, true, &has_nop, &nop_reduc)))
 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
 				    swap? arg1 : arg0,
 				    swap? arg0 : arg1);
       else
 	/* Convert reduction stmt into vectorizable form.  */
 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					     swap);
+					     swap,has_nop, nop_reduc);
       new_stmt = gimple_build_assign (res, rhs);
       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
       update_stmt (new_stmt);
-- 
2.18.1

Reply via email to