Hi,

This patch enhances mixed condition pattern detection to work with
non-constant integral then and else clauses. It checks that 'then' and
'else' are results of type conversion from the comparison type to their
current type, and generates the whole cond_epxr in comparison type
(ignoring the conversions).

Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux and
x86_64-suse-linux.

Ira

ChangeLog:

        * tree-vect-patterns.c (widened_name_p): Rename to ...
        (type_conversion_p): ... this.  Add new argument to determine
        if it's a promotion or demotion operation.  Check for
        CONVERT_EXPR_CODE_P instead of NOP_EXPR.
        (vect_recog_dot_prod_pattern): Call type_conversion_p instead
        widened_name_p.
        (vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern,
        vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern):
        Likewise.
        (vect_recog_mixed_size_cond_pattern): Likewise and allow
        non-constant then and else clauses.

testsuite/ChangeLog:

        * gcc.dg/vect/slp-cond-3.c: New test.
        * gcc.dg/vect/slp-cond-4.c: New test.

(See attached file: mixed-cond.txt)
Index: testsuite/gcc.dg/vect/slp-cond-3.c
===================================================================
--- testsuite/gcc.dg/vect/slp-cond-3.c  (revision 0)
+++ testsuite/gcc.dg/vect/slp-cond-3.c  (revision 0)
@@ -0,0 +1,84 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+/* Comparison in int, then/else and result in unsigned char.  */
+
+static inline unsigned char
+foo (int x, int y, int a, int b)
+{
+  if (x >= y)
+    return a;
+  else
+    return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b,
+     unsigned char * __restrict__ c, unsigned char * __restrict__ d,
+     unsigned char * __restrict__ e, int w)
+{
+  int i;
+  for (i = 0; i < N/16; i++, a += 16, b += 16, c += 16, d += 16, e += 16)
+    {
+      e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
+      e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
+      e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
+      e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
+      e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
+      e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
+      e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
+      e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
+      e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
+      e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
+      e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
+      e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
+      e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
+      e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
+      e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
+      e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
+    }
+}
+
+
+unsigned char a[N], b[N], c[N], d[N], e[N];
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = 5;
+      e[i] = 0;
+
+      switch (i % 9)
+        {
+        case 0: asm (""); c[i] = i; d[i] = i + 1; break;
+        case 1: c[i] = 0; d[i] = 0; break;
+        case 2: c[i] = i + 1; d[i] = i - 1; break;
+        case 3: c[i] = i; d[i] = i + 7; break;
+        case 4: c[i] = i; d[i] = i; break;
+        case 5: c[i] = i + 16; d[i] = i + 3; break;
+        case 6: c[i] = i - 5; d[i] = i; break;
+        case 7: c[i] = i; d[i] = i; break;
+        case 8: c[i] = i; d[i] = i - 7; break;
+        }
+    }
+
+  bar (a, b, c, d, e, 2);
+  for (i = 0; i < N; i++)
+    if (e[i] != ((i % 3) == 0 ? 10 : 2 * i))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/slp-cond-4.c
===================================================================
--- testsuite/gcc.dg/vect/slp-cond-4.c  (revision 0)
+++ testsuite/gcc.dg/vect/slp-cond-4.c  (revision 0)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+/* Comparison in short, then/else and result in int.  */
+static inline int
+foo (short x, short y, int a, int b)
+{
+  if (x >= y)
+    return a;
+  else
+    return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (short * __restrict__ a, short * __restrict__ b,
+     short * __restrict__ c, short * __restrict__ d,
+     int * __restrict__ e, int w)
+{
+  int i;
+  int stride = 16;
+
+  for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
+                                d += stride, e += stride)
+    {
+      e[0] = foo (c[0], d[0], a[0], b[0]);
+      e[1] = foo (c[1], d[1], a[1], b[1]);
+      e[2] = foo (c[2], d[2], a[2], b[2]);
+      e[3] = foo (c[3], d[3], a[3], b[3]);
+      e[4] = foo (c[4], d[4], a[4], b[4]);
+      e[5] = foo (c[5], d[5], a[5], b[5]);
+      e[6] = foo (c[6], d[6], a[6], b[6]);
+      e[7] = foo (c[7], d[7], a[7], b[7]);
+      e[8] = foo (c[8], d[8], a[8], b[8]);
+      e[9] = foo (c[9], d[9], a[9], b[9]);
+      e[10] = foo (c[10], d[10], a[10], b[10]);
+      e[11] = foo (c[11], d[11], a[11], b[11]);
+      e[12] = foo (c[12], d[12], a[12], b[12]);
+      e[13] = foo (c[13], d[13], a[13], b[13]);
+      e[14] = foo (c[14], d[14], a[14], b[14]);
+      e[15] = foo (c[15], d[15], a[15], b[15]);
+    }
+}
+
+
+short a[N], b[N], c[N], d[N];
+int e[N];
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = 5;
+      e[i] = 0;
+
+      switch (i % 9)
+        {
+        case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break;
+        case 1: c[i] = 0; d[i] = 0; break;
+        case 2: c[i] = i + 1; d[i] = - i - 1; break;
+        case 3: c[i] = i; d[i] = i + 7; break;
+        case 4: c[i] = i; d[i] = i; break;
+        case 5: c[i] = i + 16; d[i] = i + 3; break;
+        case 6: c[i] = - i - 5; d[i] = - i; break;
+        case 7: c[i] = - i; d[i] = - i; break;
+        case 8: c[i] = - i; d[i] = - i - 7; break;
+        }
+    }
+
+  bar (a, b, c, d, e, 2);
+  for (i = 0; i < N; i++)
+    if (e[i] != ((i % 3) == 0 ? 5 : i))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } 
} */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: tree-vect-patterns.c
===================================================================
--- tree-vect-patterns.c        (revision 183902)
+++ tree-vect-patterns.c        (working copy)
@@ -84,18 +84,16 @@ new_pattern_def_seq (stmt_vec_info stmt_info, gimp
   append_pattern_def_seq (stmt_info, stmt);
 }
 
-/* Function widened_name_p
-
-   Check whether NAME, an ssa-name used in USE_STMT,
-   is a result of a type-promotion, such that:
+/* Check whether NAME, an ssa-name used in USE_STMT,
+   is a result of a type promotion or demotion, such that:
      DEF_STMT: NAME = NOP (name0)
-   where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
+   where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
    If CHECK_SIGN is TRUE, check that either both types are signed or both are
    unsigned.  */
 
 static bool
-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
-               bool check_sign)
+type_conversion_p (tree name, gimple use_stmt, bool check_sign,
+                  tree *orig_type, gimple *def_stmt, bool *promotion)
 {
   tree dummy;
   gimple dummy_gimple;
@@ -117,23 +115,29 @@ static bool
       && dt != vect_external_def && dt != vect_constant_def)
     return false;
 
-  if (! *def_stmt)
+  if (!*def_stmt)
     return false;
 
   if (!is_gimple_assign (*def_stmt))
     return false;
 
-  if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
+  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
     return false;
 
   oprnd0 = gimple_assign_rhs1 (*def_stmt);
 
-  *half_type = TREE_TYPE (oprnd0);
-  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
-      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
-      || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
+  *orig_type = TREE_TYPE (oprnd0);
+  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
+      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
     return false;
 
+  if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
+    *promotion = true;
+  else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
+    *promotion = false;
+  else
+    return false;
+
   if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
                           NULL, &dummy_gimple, &dummy, &dt))
     return false;
@@ -213,6 +217,7 @@ vect_recog_dot_prod_pattern (VEC (gimple, heap) **
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
   tree var;
+  bool promotion;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -271,7 +276,9 @@ vect_recog_dot_prod_pattern (VEC (gimple, heap) **
         return NULL;
       stmt = last_stmt;
 
-      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
+      if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
+                               &promotion)
+         && promotion)
         {
           stmt = def_stmt;
           oprnd0 = gimple_assign_rhs1 (stmt);
@@ -328,10 +335,14 @@ vect_recog_dot_prod_pattern (VEC (gimple, heap) **
       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
         return NULL;
-      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
+      if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
+                                &promotion)
+          || !promotion)
         return NULL;
       oprnd00 = gimple_assign_rhs1 (def_stmt);
-      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
+      if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
+                                &promotion)
+          || !promotion)
         return NULL;
       oprnd01 = gimple_assign_rhs1 (def_stmt);
       if (!types_compatible_p (half_type0, half_type1))
@@ -536,6 +547,7 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap)
   int dummy_int;
   VEC (tree, heap) *dummy_vec;
   bool op1_ok;
+  bool promotion;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -555,12 +567,15 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap)
     return NULL;
 
   /* Check argument 0.  */
-  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
-    return NULL;
+  if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
+                         &promotion)
+      || !promotion)
+     return NULL;
   /* Check argument 1.  */
-  op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
+  op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
+                              &def_stmt1, &promotion);
 
-  if (op1_ok)
+  if (op1_ok && promotion)
     {
       oprnd0 = gimple_assign_rhs1 (def_stmt0);
       oprnd1 = gimple_assign_rhs1 (def_stmt1);
@@ -800,6 +815,7 @@ vect_recog_widen_sum_pattern (VEC (gimple, heap) *
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
   tree var;
+  bool promotion;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -834,8 +850,10 @@ vect_recog_widen_sum_pattern (VEC (gimple, heap) *
      Left to check that oprnd0 is defined by a cast from type 'type' to type
      'TYPE'.  */
 
-  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
-    return NULL;
+  if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
+                          &promotion)
+      || !promotion)
+     return NULL;
 
   oprnd0 = gimple_assign_rhs1 (stmt);
   *type_in = half_type;
@@ -900,6 +918,7 @@ vect_operation_fits_smaller_type (gimple stmt, tre
   bool first = false;
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+  bool promotion;
 
   *op0 = NULL_TREE;
   *op1 = NULL_TREE;
@@ -931,7 +950,9 @@ vect_operation_fits_smaller_type (gimple stmt, tre
   else
     {
       first = true;
-      if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
+      if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
+                               &promotion)
+         || !promotion
           || !gimple_bb (def_stmt)
           || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
           || !vinfo_for_stmt (def_stmt))
@@ -1313,6 +1334,7 @@ vect_recog_widen_shift_pattern (VEC (gimple, heap)
   VEC (tree, heap) * dummy_vec;
   gimple use_stmt = NULL;
   bool over_widen = false;
+  bool promotion;
 
   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
     return NULL;
@@ -1367,8 +1389,10 @@ vect_recog_widen_shift_pattern (VEC (gimple, heap)
     return NULL;
 
   /* Check operand 0: it has to be defined by a type promotion.  */
-  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
-    return NULL;
+  if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
+                          &promotion)
+      || !promotion)
+     return NULL;
 
   /* Check operand 1: has to be positive.  We check that it fits the type
      in vect_handle_widen_op_by_const ().  */
@@ -1809,9 +1833,9 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap
      S1  a_T = x_t CMP y_t ? b_T : c_T;
 
    where type 'TYPE' is an integral type which has different size
-   from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
+   from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
    than 'type', the constants need to fit into an integer type
-   with the same width as 'type'.
+   with the same width as 'type') or results of conversion from 'type'.
 
    Input:
 
@@ -1836,10 +1860,14 @@ vect_recog_mixed_size_cond_pattern (VEC (gimple, h
   gimple last_stmt = VEC_index (gimple, *stmts, 0);
   tree cond_expr, then_clause, else_clause;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
-  tree type, vectype, comp_vectype, itype, vecitype;
+  tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
   enum machine_mode cmpmode;
   gimple pattern_stmt, def_stmt;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
+  gimple def_stmt0 = NULL, def_stmt1 = NULL;
+  bool promotion;
+  tree comp_scalar_type;
 
   if (!is_gimple_assign (last_stmt)
       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
@@ -1850,19 +1878,50 @@ vect_recog_mixed_size_cond_pattern (VEC (gimple, h
   then_clause = gimple_assign_rhs2 (last_stmt);
   else_clause = gimple_assign_rhs3 (last_stmt);
 
-  if (TREE_CODE (then_clause) != INTEGER_CST
-      || TREE_CODE (else_clause) != INTEGER_CST)
-    return NULL;
-
   if (!COMPARISON_CLASS_P (cond_expr))
     return NULL;
 
-  comp_vectype
-    = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
+  comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
+  comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
   if (comp_vectype == NULL_TREE)
     return NULL;
 
   type = gimple_expr_type (last_stmt);
+  if (types_compatible_p (type, comp_scalar_type)
+      || ((TREE_CODE (then_clause) != INTEGER_CST
+          || TREE_CODE (else_clause) != INTEGER_CST)
+         && !INTEGRAL_TYPE_P (comp_scalar_type))
+      || !INTEGRAL_TYPE_P (type))
+    return NULL;
+
+  if ((TREE_CODE (then_clause) != INTEGER_CST
+       && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
+                              &def_stmt0, &promotion))
+      || (TREE_CODE (else_clause) != INTEGER_CST
+          && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
+                                 &def_stmt1, &promotion)))
+    return NULL;
+
+  if (orig_type0 && orig_type1
+      && !types_compatible_p (orig_type0, orig_type1))
+    return NULL;
+
+  if (orig_type0)
+    {
+      if (!types_compatible_p (orig_type0, comp_scalar_type))
+       return NULL;
+      then_clause = gimple_assign_rhs1 (def_stmt0);
+      itype = orig_type0;
+    }
+
+  if (orig_type1)
+    {
+      if (!types_compatible_p (orig_type1, comp_scalar_type))
+       return NULL;
+      else_clause = gimple_assign_rhs1 (def_stmt1);
+      itype = orig_type1;
+    }
+
   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
 
   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
@@ -1875,8 +1934,10 @@ vect_recog_mixed_size_cond_pattern (VEC (gimple, h
   if (expand_vec_cond_expr_p (vectype, comp_vectype))
     return NULL;
 
-  itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
-                                         TYPE_UNSIGNED (type));
+  if (itype == NULL_TREE)
+    itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
+                                           TYPE_UNSIGNED (type));
+
   if (itype == NULL_TREE
       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
     return NULL;
@@ -1890,8 +1951,10 @@ vect_recog_mixed_size_cond_pattern (VEC (gimple, h
 
   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
     {
-      if (!int_fits_type_p (then_clause, itype)
-         || !int_fits_type_p (else_clause, itype))
+      if ((TREE_CODE (then_clause) == INTEGER_CST
+          && !int_fits_type_p (then_clause, itype))
+         || (TREE_CODE (else_clause) == INTEGER_CST
+             && !int_fits_type_p (else_clause, itype)))
        return NULL;
     }
 

Reply via email to