Fix PR48052: loop not vectorized if index is "unsigned int"

Abderrazek Zaafrani Mon, 04 May 2015 12:48:52 -0700

This is an old thread and we are still running into similar issues:
Code is not being vectorized on 64-bit target due to scev not being
able to optimally analyze overflow condition.


While the original test case shown here seems to work now, it does not
work if the start value is not a constant and the loop index variable
is of unsigned type: Ex

void loop2( double const * __restrict__ x_in, double * __restrict__
x_out, double const * __restrict__ c, unsigned int N, unsigned int
start) {
 for(unsigned int i=start; i!=N; ++i)
   x_out[i] = c[i]*x_in[i];
}

Here is our unit test:

int foo(int* A, int* B, unsigned start, unsigned B)
{
  int s;
  for (unsigned k = start; k <start+B; k++)
    s += A[k] * B[k];
  return s;
}

Our unit test case is extracted from a matrix multiply of a
two-dimensional array and all loops are blocked by hand by a factor of
B. Even though a bit modified, above loop corresponds to the innermost
loop of the blocked matrix multiply.

We worked on patch to solve the problem (see attachment.)
The attached patch passed bootstrap and make check on x86_64-linux.
Ok for trunk?

Thanks,
Abderrazek Zaafrani

From eedbcd1ef6a81bb9c000e0dba9ff2a6c524576ac Mon Sep 17 00:00:00 2001
From: Abderrazek Zaafrani <a.zaafr...@samsung.com>
Date: Mon, 4 May 2015 11:00:12 -0500
Subject: [PATCH] scev for vectorization

        PR optimization/48052
        * tree-ssa-loop-niter.c (variable_appears_in_loop_exit_condition): New.
        (scev_probably_wraps_p): Handle unsigned convert expressions to a 
larger type
        than the basic induction variable.

        * gcc.dg/vect/pr48052.c: New.
---
 gcc/testsuite/gcc.dg/vect/pr48052.c | 27 ++++++++++++
 gcc/tree-ssa-loop-niter.c           | 84 +++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr48052.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c 
b/gcc/testsuite/gcc.dg/vect/pr48052.c
new file mode 100644
index 0000000..8e406d7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr48052.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 042f8df..345fb93 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3773,6 +3773,30 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Returns true when T appears in the exit condition of LOOP.  */
+
+static bool
+variable_appears_in_loop_exit_condition (tree t, struct loop *loop)
+{
+  struct nb_iter_bound *bound;
+
+  /* For now, we are only interested in loops with one exit condition.  */
+  if (loop->bounds == NULL || loop->bounds->next != NULL)
+      return false;
+
+  for (bound = loop->bounds; bound; bound = bound->next)
+    {
+      if (gimple_code (bound->stmt) != GIMPLE_COND)
+        return false;
+
+      if (t == gimple_cond_lhs(bound->stmt)
+         || t == gimple_cond_rhs(bound->stmt))
+        return true;
+    }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3879,6 +3903,66 @@ scev_probably_wraps_p (tree base, tree step,
 
   fold_undefer_and_ignore_overflow_warnings ();
 
+  /* At this point, we could not determine that the current scalar
+     evolution composed of base and step does not overflow.  In order
+     to improve this analysis, go back to the context of this scev,
+     i.e., statement and loop, and determine from there if we can
+     deduce that there is no overflow.
+
+     We are so far interested in convert statement of this form
+
+     _1 = (some cast) I;
+
+     where I is a basic induction variable.  This case is common when
+     computing addresses for 64-bit targets.  */
+  if (loop != NULL && loop->nb_iterations != NULL && loop->bounds != NULL
+      && at_stmt != NULL && integer_onep (step))
+    {
+      enum tree_code nbi_code = TREE_CODE (loop->nb_iterations);
+      enum gimple_code stmt_code = gimple_code (at_stmt);
+
+      if (nbi_code != SCEV_NOT_KNOWN && stmt_code == GIMPLE_ASSIGN)
+        {
+          tree rhs1 = gimple_assign_rhs1 (at_stmt);
+          enum tree_code tree_code = gimple_assign_rhs_code (at_stmt);
+          tree rhs2 = gimple_assign_rhs2 (at_stmt);
+
+          /* If at_stmt is a convert statement: _1 = (some cast) I;  */
+          if (rhs1 != NULL && rhs2 == NULL
+              && (tree_code == CONVERT_EXPR || tree_code == NOP_EXPR))
+            {
+              tree stmt_type = TREE_TYPE (gimple_assign_lhs (at_stmt));
+              int stmt_type_size = tree_to_uhwi (TYPE_SIZE(stmt_type));
+              int rhs1_type_size = tree_to_uhwi (TYPE_SIZE(TREE_TYPE(rhs1)));
+              gimple def_rhs1 = SSA_NAME_DEF_STMT (rhs1);
+
+              if (gimple_code (def_rhs1) == GIMPLE_PHI
+                 && gimple_phi_num_args (def_rhs1) == 2
+                 && stmt_type_size > rhs1_type_size)
+                {
+                  tree n1 = PHI_ARG_DEF (def_rhs1, 0);
+                  tree n2 = PHI_ARG_DEF (def_rhs1, 1);
+
+                 /* Induction variables with a constant initial value
+                    are already handled above.  */
+                  if (TREE_CODE (n1) != INTEGER_CST)
+                    {
+                      gimple n1_stmt = SSA_NAME_DEF_STMT (n1);
+                      if (n1_stmt != NULL
+                          && (n1_stmt->bb == NULL
+                             || n1_stmt->bb->loop_father != loop)
+                          && variable_appears_in_loop_exit_condition (n2, 
loop))
+                       /* There is no overflow on _1 = (some cast) I;
+                          because the cast is to a larger type than
+                          the type of the basic induction variable
+                          "I" and the loop is countable.  */
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
   return true;
-- 
2.1.0.243.g30d45f7

Fix PR48052: loop not vectorized if index is "unsigned int"

Reply via email to