Sorry for the slow review. Just concentrating on tree-vect-patterns.c, as before:
Tamar Christina <tamar.christ...@arm.com> writes: > @@ -521,6 +522,9 @@ vect_joust_widened_type (tree type, tree new_type, tree > *common_type) > unsigned int precision = MAX (TYPE_PRECISION (*common_type), > TYPE_PRECISION (new_type)); > precision *= 2; > + > + /* The resulting application is unsigned, check if we have enough > + precision to perform the operation. */ > if (precision * 2 > TYPE_PRECISION (type)) > return false; > Not sure what the comment means by “application” here, but the common type we pick is signed rather than unsigned. > @@ -546,7 +554,8 @@ static unsigned int > vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code > code, > tree_code widened_code, bool shift_p, > unsigned int max_nops, > - vect_unpromoted_value *unprom, tree *common_type) > + vect_unpromoted_value *unprom, tree *common_type, > + enum optab_subtype *subtype = NULL) > { > /* Check for an integer operation with the right code. */ > gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); > @@ -607,7 +616,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info > stmt_info, tree_code code, > = vinfo->lookup_def (this_unprom->op); > nops = vect_widened_op_tree (vinfo, def_stmt_info, code, > widened_code, shift_p, max_nops, > - this_unprom, common_type); > + this_unprom, common_type, > + subtype); > if (nops == 0) > return 0; > > @@ -625,7 +635,24 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info > stmt_info, tree_code code, > *common_type = this_unprom->type; > else if (!vect_joust_widened_type (type, this_unprom->type, > common_type)) > - return 0; > + { > + if (subtype) > + { AIUI, if we get here then: - there must be one unsigned operand (A) of precision P - there must be one signed operand (B) with precision <= P - we can't extend to precision 2*P A conversion is needed if B's precision is < P. That conversion should be to a signed type with precision P. So… > + tree new_type = *common_type; > + /* See if we can sign extend the smaller type. */ > + if (TYPE_PRECISION (this_unprom->type) > TYPE_PRECISION > (new_type) > + && (TYPE_UNSIGNED (this_unprom->type) && > !TYPE_UNSIGNED (new_type))) …I think this second line could be an assert and > + new_type = build_nonstandard_integer_type > (TYPE_PRECISION (this_unprom->type), true); …picking an unsigned type here looks wrong. The net effect would be to convert B (the previous signed operand) to an unsigned type. > + > + if (tree_nop_conversion_p (this_unprom->type, new_type)) > + { > + *subtype = optab_vector_mixed_sign; > + *common_type = new_type; > + } IMO the sign of the common type shouldn't matter for optab_vector_mixed_sign: if we need to convert operands later, it should be to the precision of the common type but retaining the sign of the original type. So I think it would be simpler to do: if (TYPE_PRECISION (this_unprom->type) > TYPE_PRECISION (*common_type) *common_type = this_unprom->type; *subtype = optab_vector_mixed_sign; here and adjust the conversion code as described below. This also has the advantage of coping with > 2 operands, in case that ever becomes important in future. > @@ -806,12 +833,15 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info > stmt_info, tree type, > } > > /* Invoke vect_convert_input for N elements of UNPROM and store the > - result in the corresponding elements of RESULT. */ > + result in the corresponding elements of RESULT. > + > + If SUBTYPE then don't convert the types if they only > + differ by sign. */ > > static void > vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int > n, > tree *result, tree type, vect_unpromoted_value *unprom, > - tree vectype) > + tree vectype, enum optab_subtype subtype = optab_default) > { > for (unsigned int i = 0; i < n; ++i) > { > @@ -819,8 +849,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info > stmt_info, unsigned int n, > for (j = 0; j < i; ++j) > if (unprom[j].op == unprom[i].op) > break; > + > if (j < i) > result[i] = result[j]; > + else if (subtype == optab_vector_mixed_sign > + && tree_nop_conversion_p (type, unprom[i].type)) > + result[i] = unprom[i].op; > else > result[i] = vect_convert_input (vinfo, stmt_info, > type, &unprom[i], vectype); As noted above, I think we want to preserve the sign of the original type for optab_vector_mixed_sign, even if a conversion is needed. I think we should avoid the special case above and instead push subtype down into vect_convert_input. We can then adjust the type at the head of that function: if (subtype == optab_vector_mixed_sign && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op))) type = build_nonstandard_integer_type (TYPE_PRECISION (type), TYPE_SIGN (this_unprom->type)); > @@ -895,7 +929,8 @@ vect_reassociating_reduction_p (vec_info *vinfo, > > Try to find the following pattern: > > - type x_t, y_t; > + type1a x_t > + type1b y_t; > TYPE1 prod; > TYPE2 sum = init; > loop: > @@ -908,8 +943,10 @@ vect_reassociating_reduction_p (vec_info *vinfo, > [S6 prod = (TYPE2) prod; #optional] > S7 sum_1 = prod + sum_0; > > - where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is > the > - same size of 'TYPE1' or bigger. This is a special case of a reduction > + where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b', > + the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of > + 'type1a' and 'type1b' can differ. 'TYPE2' is the same size of 'TYPE1' or > + bigger and must be the same sign. This is a special case of a reduction This last bit isn't true: TYPE2 is the type of the addition and can be any sign. > computation. > > Input: > @@ -946,15 +983,15 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, > > /* Look for the following pattern > DX = (TYPE1) X; > - DY = (TYPE1) Y; > + DY = (TYPE1) Y; > DPROD = DX * DY; > - DDPROD = (TYPE2) DPROD; > + DDPROD = (TYPE2) DPROD; > sum_1 = DDPROD + sum_0; Spurious whitespace changes: would be better to tabify the whole thing or leave it as-is. > In which > - DX is double the size of X > - DY is double the size of Y > - DX, DY, DPROD all have the same type but the sign > - between DX, DY and DPROD can differ. > + between X, Y and DPROD can differ. > - sum is the same size of DPROD or bigger > - sum has been recognized as a reduction variable. > > @@ -992,21 +1029,27 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, > /* FORNOW. Can continue analyzing the def-use chain when this stmt in a > phi > inside the loop (in case we are analyzing an outer-loop). */ > vect_unpromoted_value unprom0[2]; > + enum optab_subtype subtype = optab_vector; > if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR, > - false, 2, unprom0, &half_type)) > + false, 2, unprom0, &half_type, &subtype)) > + return NULL; > + > + if (subtype == optab_vector_mixed_sign > + && TYPE_UNSIGNED (unprom_mult.type) > + && TYPE_PRECISION (half_type) * 4 > TYPE_PRECISION (unprom_mult.type)) > return NULL; Isn't the final condition here instead that TYPE1 is narrower than TYPE2? I.e. we need to reject the case in which we multiply a signed and an unsigned value to get a (logically) signed result, but then zero-extend it (rather than sign-extend it) to the precision of the addition. That would make the test: if (subtype == optab_vector_mixed_sign && TYPE_UNSIGNED (unprom_mult.type) && TYPE_PRECISION (unprom_mult.type) < TYPE_PRECISION (type)) return NULL; instead. Thanks, Richard