Hi Richard, I've attached a new version of the patch with the changes. I have also added 7 new tests in the testsuite to check the cases you mentioned.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* optabs.def (usdot_prod_optab): New.
* doc/md.texi: Document it and clarify other dot prod optabs.
* optabs-tree.h (enum optab_subtype): Add optab_vector_mixed_sign.
* optabs-tree.c (optab_for_tree_code): Support usdot_prod_optab.
* optabs.c (expand_widen_pattern_expr): Likewise.
* tree-cfg.c (verify_gimple_assign_ternary): Likewise.
* tree-vect-loop.c (vectorizable_reduction): Query dot-product kind.
* tree-vect-patterns.c (vect_supportable_direct_optab_p): Take optional
optab subtype.
(vect_widened_op_tree): Optionally ignore
mismatch types.
(vect_recog_dot_prod_pattern): Support usdot_prod_optab.
--- inline copy of patch ---
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index
00caf3844ccf8ea289d581839766502d51b9e8d7..1356afb7f903f17c198103562b5cd145ecb9966f
100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5446,13 +5446,55 @@ Like @samp{fold_left_plus_@var{m}}, but takes an
additional mask operand
@cindex @code{sdot_prod@var{m}} instruction pattern
@item @samp{sdot_prod@var{m}}
+
+Compute the sum of the products of two signed elements.
+Operand 1 and operand 2 are of the same mode. Their
+product, which is of a wider mode, is computed and added to operand 3.
+Operand 3 is of a mode equal or wider than the mode of the product. The
+result is placed in operand 0, which is of the same mode as operand 3.
+
+Semantically the expressions perform the multiplication in the following signs
+
+@smallexample
+sdot<signed c, signed a, signed b> ==
+ res = sign-ext (a) * sign-ext (b) + c
+@dots{}
+@end smallexample
+
@cindex @code{udot_prod@var{m}} instruction pattern
-@itemx @samp{udot_prod@var{m}}
-Compute the sum of the products of two signed/unsigned elements.
-Operand 1 and operand 2 are of the same mode. Their product, which is of a
-wider mode, is computed and added to operand 3. Operand 3 is of a mode equal or
-wider than the mode of the product. The result is placed in operand 0, which
-is of the same mode as operand 3.
+@item @samp{udot_prod@var{m}}
+
+Compute the sum of the products of two unsigned elements.
+Operand 1 and operand 2 are of the same mode. Their
+product, which is of a wider mode, is computed and added to operand 3.
+Operand 3 is of a mode equal or wider than the mode of the product. The
+result is placed in operand 0, which is of the same mode as operand 3.
+
+Semantically the expressions perform the multiplication in the following signs
+
+@smallexample
+udot<unsigned c, unsigned a, unsigned b> ==
+ res = zero-ext (a) * zero-ext (b) + c
+@dots{}
+@end smallexample
+
+
+
+@cindex @code{usdot_prod@var{m}} instruction pattern
+@item @samp{usdot_prod@var{m}}
+Compute the sum of the products of elements of different signs.
+Operand 1 must be unsigned and operand 2 signed. Their
+product, which is of a wider mode, is computed and added to operand 3.
+Operand 3 is of a mode equal or wider than the mode of the product. The
+result is placed in operand 0, which is of the same mode as operand 3.
+
+Semantically the expressions perform the multiplication in the following signs
+
+@smallexample
+usdot<unsigned c, unsigned a, signed b> ==
+ res = ((unsigned-conv) sign-ext (a)) * zero-ext (b) + c
+@dots{}
+@end smallexample
@cindex @code{ssad@var{m}} instruction pattern
@item @samp{ssad@var{m}}
diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
index
c3aaa1a416991e856d3e24da45968a92ebada82c..fbd2b06b8dbfd560dfb66b314830e6b564b37abb
100644
--- a/gcc/optabs-tree.h
+++ b/gcc/optabs-tree.h
@@ -29,7 +29,8 @@ enum optab_subtype
{
optab_default,
optab_scalar,
- optab_vector
+ optab_vector,
+ optab_vector_mixed_sign
};
/* Return the optab used for computing the given operation on the type given by
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index
95ffe397c23e80c105afea52e9d47216bf52f55a..eeb5aeed3202cc6971b6447994bc5311e9c010bb
100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -127,7 +127,12 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab;
case DOT_PROD_EXPR:
- return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
+ {
+ if (subtype == optab_vector_mixed_sign)
+ return usdot_prod_optab;
+
+ return (TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab);
+ }
case SAD_EXPR:
return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index
62a6bdb4c59bf8263c499245795576199606d372..14d8ad2f33fd75388435fe912380e177f8f3c54b
100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -262,6 +262,11 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1,
rtx wide_op,
bool sbool = false;
oprnd0 = ops->op0;
+ if (nops >= 2)
+ oprnd1 = ops->op1;
+ if (nops >= 3)
+ oprnd2 = ops->op2;
+
tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
|| ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR)
@@ -285,6 +290,27 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1,
rtx wide_op,
? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab);
sbool = true;
}
+ else if (ops->code == DOT_PROD_EXPR)
+ {
+ enum optab_subtype subtype = optab_default;
+ signop sign1 = TYPE_SIGN (TREE_TYPE (oprnd0));
+ signop sign2 = TYPE_SIGN (TREE_TYPE (oprnd1));
+ if (sign1 == sign2)
+ ;
+ else if (sign1 == SIGNED && sign2 == UNSIGNED)
+ {
+ subtype = optab_vector_mixed_sign;
+ /* Same as optab_vector_mixed_sign but flip the operands. */
+ std::swap (op0, op1);
+ }
+ else if (sign1 == UNSIGNED && sign2 == SIGNED)
+ subtype = optab_vector_mixed_sign;
+ else
+ gcc_unreachable ();
+
+ widen_pattern_optab
+ = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), subtype);
+ }
else
widen_pattern_optab
= optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
@@ -298,10 +324,7 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1,
rtx wide_op,
gcc_assert (icode != CODE_FOR_nothing);
if (nops >= 2)
- {
- oprnd1 = ops->op1;
- tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
- }
+ tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
else if (sbool)
{
nops = 2;
@@ -316,7 +339,6 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1,
rtx wide_op,
{
gcc_assert (tmode1 == tmode0);
gcc_assert (op1);
- oprnd2 = ops->op2;
wmode = TYPE_MODE (TREE_TYPE (oprnd2));
}
diff --git a/gcc/optabs.def b/gcc/optabs.def
index
b192a9d070b8aa72e5676b2eaa020b5bdd7ffcc8..f470c2168378cec840edf7fbdb7c18615baae928
100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -352,6 +352,7 @@ OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
+OPTAB_D (usdot_prod_optab, "usdot_prod$I$a")
OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
OPTAB_D (usad_optab, "usad$I$a")
OPTAB_D (ssad_optab, "ssad$I$a")
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index
02256580c986be426564adc1105ed2e1c69b0efc..f250f0fe99bec5278a0963e92bc1d2a61d9eee70
100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4412,7 +4412,8 @@ verify_gimple_assign_ternary (gassign *stmt)
&& !SCALAR_FLOAT_TYPE_P (rhs1_type))
|| (!INTEGRAL_TYPE_P (lhs_type)
&& !SCALAR_FLOAT_TYPE_P (lhs_type))))
- || !types_compatible_p (rhs1_type, rhs2_type)
+ /* rhs1_type and rhs2_type may differ in sign. */
+ || !tree_nop_conversion_p (rhs1_type, rhs2_type)
|| !useless_type_conversion_p (lhs_type, rhs3_type)
|| maybe_lt (GET_MODE_SIZE (element_mode (rhs3_type)),
2 * GET_MODE_SIZE (element_mode (rhs1_type))))
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index
ee79808472cea88786e5c04756980b456c3f5a02..d2accf3c35ade25e8d2ff4ee88136651e3e87c74
100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6663,6 +6663,12 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
bool lane_reduc_code_p
= (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR);
int op_type = TREE_CODE_LENGTH (code);
+ enum optab_subtype optab_query_kind = optab_vector;
+ if (code == DOT_PROD_EXPR
+ && TYPE_SIGN (TREE_TYPE (gimple_assign_rhs1 (stmt)))
+ != TYPE_SIGN (TREE_TYPE (gimple_assign_rhs2 (stmt))))
+ optab_query_kind = optab_vector_mixed_sign;
+
scalar_dest = gimple_assign_lhs (stmt);
scalar_type = TREE_TYPE (scalar_dest);
@@ -7190,7 +7196,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
bool ok = true;
/* 4.1. check support for the operation in the loop */
- optab optab = optab_for_tree_code (code, vectype_in, optab_vector);
+ optab optab = optab_for_tree_code (code, vectype_in, optab_query_kind);
if (!optab)
{
if (dump_enabled_p ())
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index
c6b6feadb8d8d5cc57ded192cd68dd54b9185aef..77605e55dec7b4f6b0a1e1fdafa6313b987fa12c
100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -191,9 +191,9 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
}
/* Return true if the target supports a vector version of CODE,
- where CODE is known to map to a direct optab. ITYPE specifies
- the type of (some of) the scalar inputs and OTYPE specifies the
- type of the scalar result.
+ where CODE is known to map to a direct optab with the given SUBTYPE.
+ ITYPE specifies the type of (some of) the scalar inputs and OTYPE
+ specifies the type of the scalar result.
If CODE allows the inputs and outputs to have different type
(such as for WIDEN_SUM_EXPR), it is the input mode rather
@@ -208,7 +208,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
static bool
vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
tree itype, tree *vecotype_out,
- tree *vecitype_out = NULL)
+ tree *vecitype_out = NULL,
+ enum optab_subtype subtype = optab_default)
{
tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
if (!vecitype)
@@ -218,7 +219,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree
otype, tree_code code,
if (!vecotype)
return false;
- optab optab = optab_for_tree_code (code, vecitype, optab_default);
+ optab optab = optab_for_tree_code (code, vecitype, subtype);
if (!optab)
return false;
@@ -521,6 +522,9 @@ vect_joust_widened_type (tree type, tree new_type, tree
*common_type)
unsigned int precision = MAX (TYPE_PRECISION (*common_type),
TYPE_PRECISION (new_type));
precision *= 2;
+
+ /* The resulting application is unsigned, check if we have enough
+ precision to perform the operation. */
if (precision * 2 > TYPE_PRECISION (type))
return false;
@@ -539,6 +543,10 @@ vect_joust_widened_type (tree type, tree new_type, tree
*common_type)
to a type that (a) is narrower than the result of STMT_INFO and
(b) can hold all leaf operand values.
+ If SUBTYPE then allow that the signs of the operands
+ may differ in signs but not in precision. SUBTYPE is updated to reflect
+ this.
+
Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
exists. */
@@ -546,7 +554,8 @@ static unsigned int
vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
tree_code widened_code, bool shift_p,
unsigned int max_nops,
- vect_unpromoted_value *unprom, tree *common_type)
+ vect_unpromoted_value *unprom, tree *common_type,
+ enum optab_subtype *subtype = NULL)
{
/* Check for an integer operation with the right code. */
gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
@@ -607,7 +616,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info
stmt_info, tree_code code,
= vinfo->lookup_def (this_unprom->op);
nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
widened_code, shift_p, max_nops,
- this_unprom, common_type);
+ this_unprom, common_type,
+ subtype);
if (nops == 0)
return 0;
@@ -625,7 +635,24 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info
stmt_info, tree_code code,
*common_type = this_unprom->type;
else if (!vect_joust_widened_type (type, this_unprom->type,
common_type))
- return 0;
+ {
+ if (subtype)
+ {
+ tree new_type = *common_type;
+ /* See if we can sign extend the smaller type. */
+ if (TYPE_PRECISION (this_unprom->type) > TYPE_PRECISION
(new_type)
+ && (TYPE_UNSIGNED (this_unprom->type) &&
!TYPE_UNSIGNED (new_type)))
+ new_type = build_nonstandard_integer_type
(TYPE_PRECISION (this_unprom->type), true);
+
+ if (tree_nop_conversion_p (this_unprom->type, new_type))
+ {
+ *subtype = optab_vector_mixed_sign;
+ *common_type = new_type;
+ }
+ }
+ else
+ return 0;
+ }
}
}
next_op += nops;
@@ -806,12 +833,15 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info
stmt_info, tree type,
}
/* Invoke vect_convert_input for N elements of UNPROM and store the
- result in the corresponding elements of RESULT. */
+ result in the corresponding elements of RESULT.
+
+ If SUBTYPE then don't convert the types if they only
+ differ by sign. */
static void
vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
tree *result, tree type, vect_unpromoted_value *unprom,
- tree vectype)
+ tree vectype, enum optab_subtype subtype = optab_default)
{
for (unsigned int i = 0; i < n; ++i)
{
@@ -819,8 +849,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info
stmt_info, unsigned int n,
for (j = 0; j < i; ++j)
if (unprom[j].op == unprom[i].op)
break;
+
if (j < i)
result[i] = result[j];
+ else if (subtype == optab_vector_mixed_sign
+ && tree_nop_conversion_p (type, unprom[i].type))
+ result[i] = unprom[i].op;
else
result[i] = vect_convert_input (vinfo, stmt_info,
type, &unprom[i], vectype);
@@ -895,7 +929,8 @@ vect_reassociating_reduction_p (vec_info *vinfo,
Try to find the following pattern:
- type x_t, y_t;
+ type1a x_t
+ type1b y_t;
TYPE1 prod;
TYPE2 sum = init;
loop:
@@ -908,8 +943,10 @@ vect_reassociating_reduction_p (vec_info *vinfo,
[S6 prod = (TYPE2) prod; #optional]
S7 sum_1 = prod + sum_0;
- where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
- same size of 'TYPE1' or bigger. This is a special case of a reduction
+ where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
+ the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
+ 'type1a' and 'type1b' can differ. 'TYPE2' is the same size of 'TYPE1' or
+ bigger and must be the same sign. This is a special case of a reduction
computation.
Input:
@@ -946,15 +983,15 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
/* Look for the following pattern
DX = (TYPE1) X;
- DY = (TYPE1) Y;
+ DY = (TYPE1) Y;
DPROD = DX * DY;
- DDPROD = (TYPE2) DPROD;
+ DDPROD = (TYPE2) DPROD;
sum_1 = DDPROD + sum_0;
In which
- DX is double the size of X
- DY is double the size of Y
- DX, DY, DPROD all have the same type but the sign
- between DX, DY and DPROD can differ.
+ between X, Y and DPROD can differ.
- sum is the same size of DPROD or bigger
- sum has been recognized as a reduction variable.
@@ -992,21 +1029,27 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
inside the loop (in case we are analyzing an outer-loop). */
vect_unpromoted_value unprom0[2];
+ enum optab_subtype subtype = optab_vector;
if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
- false, 2, unprom0, &half_type))
+ false, 2, unprom0, &half_type, &subtype))
+ return NULL;
+
+ if (subtype == optab_vector_mixed_sign
+ && TYPE_UNSIGNED (unprom_mult.type)
+ && TYPE_PRECISION (half_type) * 4 > TYPE_PRECISION (unprom_mult.type))
return NULL;
vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
tree half_vectype;
if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
- type_out, &half_vectype))
+ type_out, &half_vectype, subtype))
return NULL;
/* Get the inputs in the appropriate types. */
tree mult_oprnd[2];
vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
- unprom0, half_vectype);
+ unprom0, half_vectype, subtype);
var = vect_recog_temp_ssa_var (type, NULL);
pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
rb14433.patch
Description: rb14433.patch
