Sorry for the slow review.

Just concentrating on tree-vect-patterns.c, as before:

Tamar Christina <tamar.christ...@arm.com> writes:
> @@ -521,6 +522,9 @@ vect_joust_widened_type (tree type, tree new_type, tree 
> *common_type)
>    unsigned int precision = MAX (TYPE_PRECISION (*common_type),
>                               TYPE_PRECISION (new_type));
>    precision *= 2;
> +
> +  /* The resulting application is unsigned, check if we have enough
> +     precision to perform the operation.  */
>    if (precision * 2 > TYPE_PRECISION (type))
>      return false;
>  

Not sure what the comment means by “application” here, but the common
type we pick is signed rather than unsigned.

> @@ -546,7 +554,8 @@ static unsigned int
>  vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code 
> code,
>                     tree_code widened_code, bool shift_p,
>                     unsigned int max_nops,
> -                   vect_unpromoted_value *unprom, tree *common_type)
> +                   vect_unpromoted_value *unprom, tree *common_type,
> +                   enum optab_subtype *subtype = NULL)
>  {
>    /* Check for an integer operation with the right code.  */
>    gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
> @@ -607,7 +616,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info 
> stmt_info, tree_code code,
>               = vinfo->lookup_def (this_unprom->op);
>             nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
>                                          widened_code, shift_p, max_nops,
> -                                        this_unprom, common_type);
> +                                        this_unprom, common_type,
> +                                        subtype);
>             if (nops == 0)
>               return 0;
>  
> @@ -625,7 +635,24 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info 
> stmt_info, tree_code code,
>               *common_type = this_unprom->type;
>             else if (!vect_joust_widened_type (type, this_unprom->type,
>                                                common_type))
> -             return 0;
> +             {
> +               if (subtype)
> +                 {

AIUI, if we get here then:

- there must be one unsigned operand (A) of precision P
- there must be one signed operand (B) with precision <= P
- we can't extend to precision 2*P 

A conversion is needed if B's precision is < P.
That conversion should be to a signed type with precision P.

So…

> +                   tree new_type = *common_type;
> +                   /* See if we can sign extend the smaller type.  */
> +                   if (TYPE_PRECISION (this_unprom->type) > TYPE_PRECISION 
> (new_type)
> +                       && (TYPE_UNSIGNED (this_unprom->type) && 
> !TYPE_UNSIGNED (new_type)))

…I think this second line could be an assert and

> +                     new_type = build_nonstandard_integer_type 
> (TYPE_PRECISION (this_unprom->type), true);

…picking an unsigned type here looks wrong.  The net effect would
be to convert B (the previous signed operand) to an unsigned type.

> +
> +                   if (tree_nop_conversion_p (this_unprom->type, new_type))
> +                     {
> +                       *subtype = optab_vector_mixed_sign;
> +                       *common_type = new_type;
> +                     }

IMO the sign of the common type shouldn't matter for optab_vector_mixed_sign:
if we need to convert operands later, it should be to the precision of
the common type but retaining the sign of the original type.
So I think it would be simpler to do:

                      if (TYPE_PRECISION (this_unprom->type)
                          > TYPE_PRECISION (*common_type)
                        *common_type = this_unprom->type;
                      *subtype = optab_vector_mixed_sign;

here and adjust the conversion code as described below.

This also has the advantage of coping with > 2 operands, in case that
ever becomes important in future.

> @@ -806,12 +833,15 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info 
> stmt_info, tree type,
>  }
>  
>  /* Invoke vect_convert_input for N elements of UNPROM and store the
> -   result in the corresponding elements of RESULT.  */
> +   result in the corresponding elements of RESULT.
> +
> +   If SUBTYPE then don't convert the types if they only
> +   differ by sign.  */
>  
>  static void
>  vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int 
> n,
>                    tree *result, tree type, vect_unpromoted_value *unprom,
> -                  tree vectype)
> +                  tree vectype, enum optab_subtype subtype = optab_default)
>  {
>    for (unsigned int i = 0; i < n; ++i)
>      {
> @@ -819,8 +849,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info 
> stmt_info, unsigned int n,
>        for (j = 0; j < i; ++j)
>       if (unprom[j].op == unprom[i].op)
>         break;
> +
>        if (j < i)
>       result[i] = result[j];
> +      else if (subtype == optab_vector_mixed_sign
> +            && tree_nop_conversion_p (type, unprom[i].type))
> +     result[i] = unprom[i].op;
>        else
>       result[i] = vect_convert_input (vinfo, stmt_info,
>                                       type, &unprom[i], vectype);

As noted above, I think we want to preserve the sign of the original
type for optab_vector_mixed_sign, even if a conversion is needed.
I think we should avoid the special case above and instead push
subtype down into vect_convert_input.  We can then adjust the
type at the head of that function:

  if (subtype == optab_vector_mixed_sign
      && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
    type = build_nonstandard_integer_type (TYPE_PRECISION (type),
                                           TYPE_SIGN (this_unprom->type));

> @@ -895,7 +929,8 @@ vect_reassociating_reduction_p (vec_info *vinfo,
>  
>     Try to find the following pattern:
>  
> -     type x_t, y_t;
> +     type1a x_t
> +     type1b y_t;
>       TYPE1 prod;
>       TYPE2 sum = init;
>     loop:
> @@ -908,8 +943,10 @@ vect_reassociating_reduction_p (vec_info *vinfo,
>       [S6  prod = (TYPE2) prod;  #optional]
>       S7  sum_1 = prod + sum_0;
>  
> -   where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is 
> the
> -   same size of 'TYPE1' or bigger. This is a special case of a reduction
> +   where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
> +   the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
> +   'type1a' and 'type1b' can differ. 'TYPE2' is the same size of 'TYPE1' or
> +   bigger and must be the same sign. This is a special case of a reduction

This last bit isn't true: TYPE2 is the type of the addition and can be
any sign.

>     computation.
>  
>     Input:
> @@ -946,15 +983,15 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
>  
>    /* Look for the following pattern
>            DX = (TYPE1) X;
> -          DY = (TYPE1) Y;
> +       DY = (TYPE1) Y;
>            DPROD = DX * DY;
> -          DDPROD = (TYPE2) DPROD;
> +       DDPROD = (TYPE2) DPROD;
>            sum_1 = DDPROD + sum_0;

Spurious whitespace changes: would be better to tabify the whole thing
or leave it as-is.

>       In which
>       - DX is double the size of X
>       - DY is double the size of Y
>       - DX, DY, DPROD all have the same type but the sign
> -       between DX, DY and DPROD can differ.
> +       between X, Y and DPROD can differ.
>       - sum is the same size of DPROD or bigger
>       - sum has been recognized as a reduction variable.
>  
> @@ -992,21 +1029,27 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a 
> phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    vect_unpromoted_value unprom0[2];
> +  enum optab_subtype subtype = optab_vector;
>    if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
> -                          false, 2, unprom0, &half_type))
> +                          false, 2, unprom0, &half_type, &subtype))
> +    return NULL;
> +
> +  if (subtype == optab_vector_mixed_sign
> +      && TYPE_UNSIGNED (unprom_mult.type)
> +      && TYPE_PRECISION (half_type) * 4 > TYPE_PRECISION (unprom_mult.type))
>      return NULL;

Isn't the final condition here instead that TYPE1 is narrower than TYPE2?
I.e. we need to reject the case in which we multiply a signed and an
unsigned value to get a (logically) signed result, but then zero-extend
it (rather than sign-extend it) to the precision of the addition.

That would make the test:

  if (subtype == optab_vector_mixed_sign
      && TYPE_UNSIGNED (unprom_mult.type)
      && TYPE_PRECISION (unprom_mult.type) < TYPE_PRECISION (type))
    return NULL;    
  
instead.

Thanks,
Richard

Reply via email to