Our target (eSi-RISC) doesn't have DOT_PROD_EXPR or WIDEN_SUM_EXPR
operations in
the standard vector modes; however, it has a vectorized WIDEN_MULT_PLUS_EXPR
implementation with a double-vector output, which works just as well,
with a little
help from the compiler - as implemented in these patches.
Bootstrapped and regtested on i686-pc-linux-gnu.
2018-11-07 Joern Rennecke <joern.renne...@riscy-ip.com>
* tree-cfg.c (verify_gimple_assign_ternary):
Allow vector arguments to WIDEN_MULT_{PLUS,MINUS}_EXPR.
* tree-vect-data-refs.c (vect_get_smallest_scalar_type):
Treat WIDEN_MULT_PLUS_EXPR like WIDEN_SUM_EXPR.
* tree-vect-loop.c (get_initial_def_for_reduction): Likewise.
Get VECTYPE from STMT_VINFO_VECTYPE.
(vectorizable_reduction): Fix result vector type for
WIDEN_MULT_PLUS_EXPR.
Use optab_handler or convert_optab_hander as needed.
* tree-vect-patterns.c (vect_supportable_widen_optab_p): New function.
(vect_recog_dot_prod_pattern): If DOT_PROD_EXPR can't be expanded
directly, try to use WIDEN_MULT_PLUS_EXPR instead.
(vect_recog_widen_sum_pattern): If no WIDEN_SUM pattern is available,
try WIDEN_MULT_PLUS.
* tree-vect-stmts.c (vect_get_vector_types_for_stmt):
Allow vcector size input/output mismatch for reduction.
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c (revision 266008)
+++ gcc/tree-cfg.c (working copy)
@@ -4162,6 +4162,22 @@ verify_gimple_assign_ternary (gassign *s
{
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
+ if (VECTOR_TYPE_P (lhs_type)
+ && VECTOR_TYPE_P (rhs1_type)
+ && VECTOR_TYPE_P (rhs2_type)
+ && VECTOR_TYPE_P (rhs3_type)
+ && !maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type),
+ TYPE_VECTOR_SUBPARTS (rhs1_type))
+ && !maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type),
+ TYPE_VECTOR_SUBPARTS (rhs2_type))
+ && !maybe_ne (TYPE_VECTOR_SUBPARTS (lhs_type),
+ TYPE_VECTOR_SUBPARTS (rhs3_type)))
+ {
+ lhs_type = TREE_TYPE (lhs_type);
+ rhs1_type = TREE_TYPE (rhs1_type);
+ rhs2_type = TREE_TYPE (rhs2_type);
+ rhs3_type = TREE_TYPE (rhs3_type);
+ }
if ((!INTEGRAL_TYPE_P (rhs1_type)
&& !FIXED_POINT_TYPE_P (rhs1_type))
|| !useless_type_conversion_p (rhs1_type, rhs2_type)
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c (revision 266008)
+++ gcc/tree-vect-data-refs.c (working copy)
@@ -135,6 +135,7 @@ vect_get_smallest_scalar_type (stmt_vec_
&& (gimple_assign_cast_p (assign)
|| gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
|| gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
+ || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
|| gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
|| gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
|| gimple_assign_rhs_code (assign) == FLOAT_EXPR))
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c (revision 266008)
+++ gcc/tree-vect-loop.c (working copy)
@@ -3978,7 +3978,7 @@ get_initial_def_for_reduction (stmt_vec_
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree scalar_type = TREE_TYPE (init_val);
- tree vectype = get_vectype_for_scalar_type (scalar_type);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
tree def_for_init;
tree init_def;
@@ -4001,6 +4001,7 @@ get_initial_def_for_reduction (stmt_vec_
{
case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR:
+ case WIDEN_MULT_PLUS_EXPR:
case SAD_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
@@ -6043,6 +6044,21 @@ vectorizable_reduction (stmt_vec_info st
slp_node_instance->reduc_phis = slp_node;
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+
+ stmt_vec_info reduc = STMT_VINFO_REDUC_DEF (stmt_info);
+ if (reduc
+ && (reduc = STMT_VINFO_RELATED_STMT (reduc))
+ && gimple_assign_rhs_code (reduc->stmt) == WIDEN_MULT_PLUS_EXPR)
+ {
+ poly_uint64 nelem
+ = (TYPE_VECTOR_SUBPARTS
+ (get_vectype_for_scalar_type
+ (TREE_TYPE (gimple_assign_rhs1 (reduc->stmt)))));
+ STMT_VINFO_VECTYPE (stmt_info)
+ = (build_vector_type
+ (TREE_TYPE (gimple_assign_lhs (reduc->stmt)), nelem));
+ }
+
return true;
}
@@ -6529,7 +6545,13 @@ vectorizable_reduction (stmt_vec_info st
return false;
}
- if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+ enum insn_code icode;
+ if (convert_optab_p (optab))
+ icode
+ = convert_optab_handler (optab, TYPE_MODE (vectype_out), vec_mode);
+ else
+ icode = optab_handler (optab, vec_mode);
+ if (icode == CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf (MSG_NOTE, "op not supported by target.\n");
Index: gcc/tree-vect-patterns.c
===================================================================
--- gcc/tree-vect-patterns.c (revision 266008)
+++ gcc/tree-vect-patterns.c (working copy)
@@ -212,6 +212,49 @@ vect_supportable_direct_optab_p (tree ot
return true;
}
+/* Likewise for widening operations, like WIDEN_MULT_PLUS_EXPR:
+ Return true if the target supports a vector version of CODE,
+ where CODE is known to map to a 'conversion' optab. ITYPE specifies
+ the type of (some of) the (narrower) scalar inputs and OTYPE specifies
+ the type of the scalar result.
+
+ The number of elements of the input vector are the same as the number
+ of elements of the output vector, and Operand 0 of the target pattern
+ must match the latter.
+
+ When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
+ Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
+ is nonnull. */
+static bool
+vect_supportable_widen_optab_p (tree otype, tree_code code,
+ tree itype, tree *vecotype_out,
+ tree *vecitype_out = NULL)
+{
+ tree vecitype = get_vectype_for_scalar_type (itype);
+ if (!vecitype)
+ return false;
+
+ tree vecotype
+ = build_vector_type (otype, GET_MODE_NUNITS (TYPE_MODE (vecitype)));
+ if (!vecotype)
+ return false;
+
+ optab optab = optab_for_tree_code (code, vecitype, optab_default);
+ if (!optab)
+ return false;
+
+ insn_code icode
+ = convert_optab_handler (optab, TYPE_MODE (vecotype), TYPE_MODE
(vecitype));
+ if (icode == CODE_FOR_nothing
+ || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
+ return false;
+
+ *vecotype_out = vecotype;
+ if (vecitype_out)
+ *vecitype_out = vecitype;
+ return true;
+}
+
/* Round bit precision PRECISION up to a full element. */
static unsigned int
@@ -953,9 +996,17 @@ vect_recog_dot_prod_pattern (stmt_vec_in
vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
tree half_vectype;
+ tree_code code = DOT_PROD_EXPR;
if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
type_out, &half_vectype))
- return NULL;
+ {
+ /* We won't be able to expand DOT_PROD_EXPR with the current vector size,
+ try WIDEN_MULT_EXPR instead. */
+ code = WIDEN_MULT_PLUS_EXPR;
+ if (!vect_supportable_widen_optab_p (type, code, half_type,
+ type_out, &half_vectype))
+ return NULL;
+ }
/* Get the inputs in the appropriate types. */
tree mult_oprnd[2];
@@ -963,7 +1014,7 @@ vect_recog_dot_prod_pattern (stmt_vec_in
unprom0, half_vectype);
var = vect_recog_temp_ssa_var (type, NULL);
- pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
+ pattern_stmt = gimple_build_assign (var, code,
mult_oprnd[0], mult_oprnd[1], oprnd1);
return pattern_stmt;
@@ -1440,12 +1491,27 @@ vect_recog_widen_sum_pattern (stmt_vec_i
vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
- if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
- type_out))
- return NULL;
+ tree half_vectype;
- var = vect_recog_temp_ssa_var (type, NULL);
- pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
+ if (vect_supportable_direct_optab_p
+ (type, WIDEN_SUM_EXPR, unprom0.type, type_out))
+ {
+ var = vect_recog_temp_ssa_var (type, NULL);
+ pattern_stmt
+ = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
+ }
+ /* If we won't be able to expand WIDEN_SUM_EXPR with the current
+ vector size, try WIDEN_MULT_PLUS_EXPR instead. */
+ else if ((vect_supportable_widen_optab_p
+ (type, WIDEN_MULT_PLUS_EXPR, unprom0.type, type_out,
&half_vectype)))
+ {
+ var = vect_recog_temp_ssa_var (type, NULL);
+ tree one = build_int_cst (unprom0.type, 1);
+ pattern_stmt = gimple_build_assign (var, WIDEN_MULT_PLUS_EXPR,
+ unprom0.op, one, oprnd1);
+ }
+ else
+ return NULL;
return pattern_stmt;
}
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c (revision 266008)
+++ gcc/tree-vect-stmts.c (working copy)
@@ -10638,7 +10638,11 @@ vect_get_vector_types_for_stmt (stmt_vec
scalar_type);
if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
- GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
+ GET_MODE_SIZE (TYPE_MODE (nunits_vectype)))
+ /* Reductions that use a widening reduction would show
+ a mismatch but that's already been checked to be OK. */
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
+
return opt_result::failure_at (stmt,
"not vectorized: different sized vector "
"types in statement, %T and %T\n",