Sorry for the slow response. Tamar Christina <tamar.christ...@arm.com> writes: > […] > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index > 441d6cd28c4eaded7abd756164890dbcffd2f3b8..82123b96313e6783ea214b9259805d65c07d8858 > 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -201,7 +201,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var) > static bool > vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, > tree itype, tree *vecotype_out, > - tree *vecitype_out = NULL) > + tree *vecitype_out = NULL, > + enum optab_subtype subtype = optab_default) > { > tree vecitype = get_vectype_for_scalar_type (vinfo, itype); > if (!vecitype) > @@ -211,7 +212,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree > otype, tree_code code, > if (!vecotype) > return false; > > - optab optab = optab_for_tree_code (code, vecitype, optab_default); > + optab optab = optab_for_tree_code (code, vecitype, subtype); > if (!optab) > return false; > > @@ -487,10 +488,14 @@ vect_joust_widened_integer (tree type, bool shift_p, > tree op, > } > > /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE > - is narrower than type, storing the supertype in *COMMON_TYPE if so. */ > + is narrower than type, storing the supertype in *COMMON_TYPE if so. > + If UNPROM_TYPE then accept that *COMMON_TYPE and NEW_TYPE may be of > + different signs but equal precision and that the resulting > + multiplication of them be compatible with UNPROM_TYPE. */ > > static bool > -vect_joust_widened_type (tree type, tree new_type, tree *common_type) > +vect_joust_widened_type (tree type, tree new_type, tree *common_type, > + tree unprom_type = NULL) > { > if (types_compatible_p (*common_type, new_type)) > return true; > @@ -514,7 +519,18 @@ vect_joust_widened_type (tree type, tree new_type, tree > *common_type) > unsigned int precision = MAX (TYPE_PRECISION (*common_type), > TYPE_PRECISION (new_type)); > precision *= 2; > - if (precision * 2 > TYPE_PRECISION (type)) > + > + /* Check if the mismatch is only in the sign and if we have > + UNPROM_TYPE then allow it if there is enough precision to > + not lose any information during the conversion. */ > + if (unprom_type > + && TYPE_SIGN (unprom_type) == SIGNED > + && tree_nop_conversion_p (*common_type, new_type)) > + return true; > + > + /* The resulting application is unsigned, check if we have enough > + precision to perform the operation. */ > + if (precision * 2 > TYPE_PRECISION (unprom_type ? unprom_type : type)) > return false; > > *common_type = build_nonstandard_integer_type (precision, false); > @@ -532,6 +548,10 @@ vect_joust_widened_type (tree type, tree new_type, tree > *common_type) > to a type that (a) is narrower than the result of STMT_INFO and > (b) can hold all leaf operand values. > > + If UNPROM_TYPE then allow that the signs of the operands > + may differ in signs but not in precision and that the resulting type > + of the operation on the operands is compatible with UNPROM_TYPE. > + > Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE > exists. */ > > @@ -539,7 +559,8 @@ static unsigned int > vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code > code, > tree_code widened_code, bool shift_p, > unsigned int max_nops, > - vect_unpromoted_value *unprom, tree *common_type) > + vect_unpromoted_value *unprom, tree *common_type, > + tree unprom_type = NULL) > { > /* Check for an integer operation with the right code. */ > gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); > @@ -600,7 +621,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info > stmt_info, tree_code code, > = vinfo->lookup_def (this_unprom->op); > nops = vect_widened_op_tree (vinfo, def_stmt_info, code, > widened_code, shift_p, max_nops, > - this_unprom, common_type); > + this_unprom, common_type, > + unprom_type); > if (nops == 0) > return 0; > > @@ -617,7 +639,7 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info > stmt_info, tree_code code, > if (i == 0) > *common_type = this_unprom->type; > else if (!vect_joust_widened_type (type, this_unprom->type, > - common_type)) > + common_type, unprom_type)) > return 0; > } > } > @@ -799,12 +821,15 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info > stmt_info, tree type, > } > > /* Invoke vect_convert_input for N elements of UNPROM and store the > - result in the corresponding elements of RESULT. */ > + result in the corresponding elements of RESULT. > + > + If ALLOW_SHORT_SIGN_MISMATCH then don't convert the types if they only > + differ by sign. */ > > static void > vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int > n, > tree *result, tree type, vect_unpromoted_value *unprom, > - tree vectype) > + tree vectype, bool allow_short_sign_mismatch = false) > { > for (unsigned int i = 0; i < n; ++i) > { > @@ -812,8 +837,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info > stmt_info, unsigned int n, > for (j = 0; j < i; ++j) > if (unprom[j].op == unprom[i].op) > break; > + > if (j < i) > result[i] = result[j]; > + else if (allow_short_sign_mismatch > + && tree_nop_conversion_p (type, unprom[i].type)) > + result[i] = unprom[i].op; > else > result[i] = vect_convert_input (vinfo, stmt_info, > type, &unprom[i], vectype); > @@ -888,21 +917,24 @@ vect_reassociating_reduction_p (vec_info *vinfo, > > Try to find the following pattern: > > - type x_t, y_t; > + type1a x_t > + type1b y_t; > TYPE1 prod; > TYPE2 sum = init; > loop: > sum_0 = phi <init, sum_1> > S1 x_t = ... > S2 y_t = ... > - S3 x_T = (TYPE1) x_t; > - S4 y_T = (TYPE1) y_t; > + S3 x_T = (TYPE3) x_t; > + S4 y_T = (TYPE4) y_t; > S5 prod = x_T * y_T; > [S6 prod = (TYPE2) prod; #optional] > S7 sum_1 = prod + sum_0; > > - where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is > the > - same size of 'TYPE1' or bigger. This is a special case of a reduction > + where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b', > + the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of > + 'type1a' and 'type1b' can differ. 'TYPE2' is the same size of 'TYPE1' or > + bigger and must be the same sign. This is a special case of a reduction > computation.
What are TYPE3 and TYPE4 in the above? AFAICT the x_T and y_T casts should still be to TYPE1, since the types of x_T and y_T need to agree. The sign of TYPE2 shouldn't matter, since TYPE2 is only used for the addition. > @@ -939,15 +971,16 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, > > /* Look for the following pattern > DX = (TYPE1) X; > - DY = (TYPE1) Y; > + DY = (TYPE2) Y; > DPROD = DX * DY; > - DDPROD = (TYPE2) DPROD; > + DDPROD = (TYPE3) DPROD; > sum_1 = DDPROD + sum_0; > In which > - DX is double the size of X > - DY is double the size of Y > - DX, DY, DPROD all have the same type but the sign > - between DX, DY and DPROD can differ. > + between DX, DY and DPROD can differ. The sign of DPROD > + is one of the signs of DX or DY. > - sum is the same size of DPROD or bigger > - sum has been recognized as a reduction variable. These changes don't look right: DY has to be the same type as DX. (What's different with usdot is that X and Y can be different signs.) > @@ -986,20 +1019,29 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, > inside the loop (in case we are analyzing an outer-loop). */ > vect_unpromoted_value unprom0[2]; > if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR, > - false, 2, unprom0, &half_type)) > + false, 2, unprom0, &half_type, > + TREE_TYPE (unprom_mult.op))) > return NULL; > > + /* Check to see if there is a sign change happening in the operands of the > + multiplication and pick the appropriate optab subtype. */ > + enum optab_subtype subtype; > + if (TYPE_SIGN (unprom0[0].type) == TYPE_SIGN (unprom0[1].type)) > + subtype = optab_default; > + else > + subtype = optab_vector_mixed_sign; > + Doesn't this check the signs of the uncast operands? What really matters is how things stand after the result of the (possible) casts to half_type. E.g.: signed short x; unsigned char y; int z; z = (int) x * (int) y + z; is an sdot operation with half_type signed short, rather than a usdot operation. How about instead passing a optab_subtype* to vect_widened_op_tree, in place of the unprom_mult.op type? When this optab_subtype* is nonnull, the joust operation is allowed to fail as long as: tree_nop_conversion_p (this_unprom->type, common_type) is true. vect_widened_op_tree would set the optab_subtype to optab_vector_mixed_sign to indicate this case. We should make sure that we handle: unsigned short x; signed char y; int z; z = (int) x * (int) y + z; correctly though: this should be a usdot operation in which y is cast to signed short. I'm not sure whether the patch would insert the needed cast. Thanks, Richard > vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt); > > tree half_vectype; > if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, > half_type, > - type_out, &half_vectype)) > + type_out, &half_vectype, subtype)) > return NULL; > > /* Get the inputs in the appropriate types. */ > tree mult_oprnd[2]; > vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type, > - unprom0, half_vectype); > + unprom0, half_vectype, true); > > var = vect_recog_temp_ssa_var (type, NULL); > pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,