RFA: vectorizer patches 1/2 : WIDEN_MULT_PLUS support

Joern Wolfgang Rennecke Sat, 10 Nov 2018 23:22:34 -0800

Our target (eSi-RISC) doesn't have DOT_PROD_EXPR or WIDEN_SUM_EXPRoperations in

the standard vector modes; however, it has a vectorized WIDEN_MULT_PLUS_EXPR

implementation with a double-vector output, which works just as well,with a little

help from the compiler - as implemented in these patches.


Bootstrapped and regtested on i686-pc-linux-gnu.

2018-11-07  Joern Rennecke  <joern.renne...@riscy-ip.com>

        * tree-cfg.c (verify_gimple_assign_ternary):
        Allow vector arguments to  WIDEN_MULT_{PLUS,MINUS}_EXPR.
        * tree-vect-data-refs.c (vect_get_smallest_scalar_type):
        Treat WIDEN_MULT_PLUS_EXPR like WIDEN_SUM_EXPR.
        * tree-vect-loop.c (get_initial_def_for_reduction): Likewise.
        Get VECTYPE from STMT_VINFO_VECTYPE.
        (vectorizable_reduction): Fix result vector type for
        WIDEN_MULT_PLUS_EXPR.
        Use optab_handler or convert_optab_hander as needed.
        * tree-vect-patterns.c (vect_supportable_widen_optab_p): New function.
        (vect_recog_dot_prod_pattern): If DOT_PROD_EXPR can't be expanded
        directly, try to use WIDEN_MULT_PLUS_EXPR instead.
        (vect_recog_widen_sum_pattern): If no WIDEN_SUM pattern is available,
        try WIDEN_MULT_PLUS.
        * tree-vect-stmts.c (vect_get_vector_types_for_stmt):
        Allow vcector size input/output mismatch for reduction.

Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c      (revision 266008)
+++ gcc/tree-cfg.c      (working copy)
@@ -4162,6 +4162,22 @@ verify_gimple_assign_ternary (gassign *s
     {
     case WIDEN_MULT_PLUS_EXPR:
     case WIDEN_MULT_MINUS_EXPR:
+      if (VECTOR_TYPE_P (lhs_type)
+         && VECTOR_TYPE_P (rhs1_type)
+         && VECTOR_TYPE_P (rhs2_type)
+         && VECTOR_TYPE_P (rhs3_type)
+         && !maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type),
+                       TYPE_VECTOR_SUBPARTS (rhs1_type))
+         && !maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type),
+                       TYPE_VECTOR_SUBPARTS (rhs2_type))
+         && !maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type),
+                       TYPE_VECTOR_SUBPARTS (rhs3_type)))
+       {
+         lhs_type = TREE_TYPE (lhs_type);
+         rhs1_type = TREE_TYPE (rhs1_type);
+         rhs2_type = TREE_TYPE (rhs2_type);
+         rhs3_type = TREE_TYPE (rhs3_type);
+       }
       if ((!INTEGRAL_TYPE_P (rhs1_type)
           && !FIXED_POINT_TYPE_P (rhs1_type))
          || !useless_type_conversion_p (rhs1_type, rhs2_type)
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   (revision 266008)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -135,6 +135,7 @@ vect_get_smallest_scalar_type (stmt_vec_
       && (gimple_assign_cast_p (assign)
          || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
          || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
+         || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
          || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
          || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
          || gimple_assign_rhs_code (assign) == FLOAT_EXPR))
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        (revision 266008)
+++ gcc/tree-vect-loop.c        (working copy)
@@ -3978,7 +3978,7 @@ get_initial_def_for_reduction (stmt_vec_
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree scalar_type = TREE_TYPE (init_val);
-  tree vectype = get_vectype_for_scalar_type (scalar_type);
+  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
   enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
   tree def_for_init;
   tree init_def;
@@ -4001,6 +4001,7 @@ get_initial_def_for_reduction (stmt_vec_
     {
     case WIDEN_SUM_EXPR:
     case DOT_PROD_EXPR:
+    case WIDEN_MULT_PLUS_EXPR:
     case SAD_EXPR:
     case PLUS_EXPR:
     case MINUS_EXPR:
@@ -6043,6 +6044,21 @@ vectorizable_reduction (stmt_vec_info st
            slp_node_instance->reduc_phis = slp_node;
 
          STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+
+         stmt_vec_info reduc = STMT_VINFO_REDUC_DEF (stmt_info);
+         if (reduc
+             && (reduc = STMT_VINFO_RELATED_STMT (reduc))
+             && gimple_assign_rhs_code (reduc->stmt) == WIDEN_MULT_PLUS_EXPR)
+           {
+             poly_uint64 nelem
+               = (TYPE_VECTOR_SUBPARTS
+                   (get_vectype_for_scalar_type
+                     (TREE_TYPE (gimple_assign_rhs1 (reduc->stmt)))));
+             STMT_VINFO_VECTYPE (stmt_info)
+               = (build_vector_type
+                   (TREE_TYPE (gimple_assign_lhs (reduc->stmt)), nelem));
+           }
+
          return true;
        }
 
@@ -6529,7 +6545,13 @@ vectorizable_reduction (stmt_vec_info st
           return false;
         }
 
-      if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+      enum insn_code icode;
+      if (convert_optab_p (optab))
+       icode
+         = convert_optab_handler (optab, TYPE_MODE (vectype_out), vec_mode);
+      else
+       icode = optab_handler (optab, vec_mode);
+      if (icode == CODE_FOR_nothing)
         {
           if (dump_enabled_p ())
             dump_printf (MSG_NOTE, "op not supported by target.\n");
Index: gcc/tree-vect-patterns.c
===================================================================
--- gcc/tree-vect-patterns.c    (revision 266008)
+++ gcc/tree-vect-patterns.c    (working copy)
@@ -212,6 +212,49 @@ vect_supportable_direct_optab_p (tree ot
   return true;
 }
 
+/* Likewise for widening operations, like WIDEN_MULT_PLUS_EXPR:
+   Return true if the target supports a vector version of CODE,
+   where CODE is known to map to a 'conversion' optab.  ITYPE specifies
+   the type of (some of) the (narrower) scalar inputs and OTYPE specifies
+   the type of the scalar result.
+
+   The number of elements of the input vector are the same as the number
+   of elements of the output vector, and Operand 0 of the target pattern
+   must match the latter.
+
+   When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
+   Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
+   is nonnull.  */
+static bool
+vect_supportable_widen_optab_p (tree otype, tree_code code,
+                                tree itype, tree *vecotype_out,
+                                tree *vecitype_out = NULL)
+{
+  tree vecitype = get_vectype_for_scalar_type (itype);
+  if (!vecitype)
+    return false;
+
+  tree vecotype
+    = build_vector_type (otype, GET_MODE_NUNITS (TYPE_MODE (vecitype)));
+  if (!vecotype)
+    return false;
+
+  optab optab = optab_for_tree_code (code, vecitype, optab_default);
+  if (!optab)
+    return false;
+
+  insn_code icode
+    = convert_optab_handler (optab, TYPE_MODE (vecotype), TYPE_MODE 
(vecitype));
+  if (icode == CODE_FOR_nothing
+      || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
+    return false;
+
+  *vecotype_out = vecotype;
+  if (vecitype_out)
+    *vecitype_out = vecitype;
+  return true;
+}
+
 /* Round bit precision PRECISION up to a full element.  */
 
 static unsigned int
@@ -953,9 +996,17 @@ vect_recog_dot_prod_pattern (stmt_vec_in
   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
 
   tree half_vectype;
+  tree_code code = DOT_PROD_EXPR;
   if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
                                        type_out, &half_vectype))
-    return NULL;
+    {
+      /* We won't be able to expand DOT_PROD_EXPR with the current vector size,
+        try WIDEN_MULT_EXPR instead.  */
+      code = WIDEN_MULT_PLUS_EXPR;
+      if (!vect_supportable_widen_optab_p (type, code, half_type,
+                                          type_out, &half_vectype))
+        return NULL;
+    }
 
   /* Get the inputs in the appropriate types.  */
   tree mult_oprnd[2];
@@ -963,7 +1014,7 @@ vect_recog_dot_prod_pattern (stmt_vec_in
                       unprom0, half_vectype);
 
   var = vect_recog_temp_ssa_var (type, NULL);
-  pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
+  pattern_stmt = gimple_build_assign (var, code,
                                      mult_oprnd[0], mult_oprnd[1], oprnd1);
 
   return pattern_stmt;
@@ -1440,12 +1491,27 @@ vect_recog_widen_sum_pattern (stmt_vec_i
 
   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
 
-  if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
-                                       type_out))
-    return NULL;
+  tree half_vectype;
 
-  var = vect_recog_temp_ssa_var (type, NULL);
-  pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
+  if (vect_supportable_direct_optab_p
+       (type, WIDEN_SUM_EXPR, unprom0.type, type_out))
+    {
+      var = vect_recog_temp_ssa_var (type, NULL);
+      pattern_stmt
+       = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
+    }
+  /* If we won't be able to expand WIDEN_SUM_EXPR with the current
+     vector size, try WIDEN_MULT_PLUS_EXPR instead.  */
+  else if ((vect_supportable_widen_optab_p
+            (type, WIDEN_MULT_PLUS_EXPR, unprom0.type, type_out, 
&half_vectype)))
+    {
+      var = vect_recog_temp_ssa_var (type, NULL);
+      tree one = build_int_cst (unprom0.type, 1);
+      pattern_stmt = gimple_build_assign (var, WIDEN_MULT_PLUS_EXPR,
+                                         unprom0.op, one, oprnd1);
+    }
+  else
+    return NULL;
 
   return pattern_stmt;
 }
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 266008)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -10638,7 +10638,11 @@ vect_get_vector_types_for_stmt (stmt_vec
                                   scalar_type);
 
   if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-               GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
+               GET_MODE_SIZE (TYPE_MODE (nunits_vectype)))
+      /* Reductions that use a widening reduction would show
+        a mismatch but that's already been checked to be OK.  */
+      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
+
     return opt_result::failure_at (stmt,
                                   "not vectorized: different sized vector "
                                   "types in statement, %T and %T\n",

RFA: vectorizer patches 1/2 : WIDEN_MULT_PLUS support

Reply via email to