Re: [PATCH 1/4]middle-end Vect: Add support for dot-product where the sign for the multiplicant changes.

Richard Sandiford via Gcc-patches Mon, 07 Jun 2021 03:11:34 -0700

Sorry for the slow response.

Tamar Christina <tamar.christ...@arm.com> writes:
> […]
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index 
> 441d6cd28c4eaded7abd756164890dbcffd2f3b8..82123b96313e6783ea214b9259805d65c07d8858
>  100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -201,7 +201,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
>  static bool
>  vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
>                                  tree itype, tree *vecotype_out,
> -                                tree *vecitype_out = NULL)
> +                                tree *vecitype_out = NULL,
> +                                enum optab_subtype subtype = optab_default)
>  {
>    tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
>    if (!vecitype)
> @@ -211,7 +212,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree 
> otype, tree_code code,
>    if (!vecotype)
>      return false;
>
> -  optab optab = optab_for_tree_code (code, vecitype, optab_default);
> +  optab optab = optab_for_tree_code (code, vecitype, subtype);
>    if (!optab)
>      return false;
>
> @@ -487,10 +488,14 @@ vect_joust_widened_integer (tree type, bool shift_p, 
> tree op,
>  }
>
>  /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
> -   is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
> +   is narrower than type, storing the supertype in *COMMON_TYPE if so.
> +   If UNPROM_TYPE then accept that *COMMON_TYPE and NEW_TYPE may be of
> +   different signs but equal precision and that the resulting
> +   multiplication of them be compatible with UNPROM_TYPE.   */
>
>  static bool
> -vect_joust_widened_type (tree type, tree new_type, tree *common_type)
> +vect_joust_widened_type (tree type, tree new_type, tree *common_type,
> +                        tree unprom_type = NULL)
>  {
>    if (types_compatible_p (*common_type, new_type))
>      return true;
> @@ -514,7 +519,18 @@ vect_joust_widened_type (tree type, tree new_type, tree 
> *common_type)
>    unsigned int precision = MAX (TYPE_PRECISION (*common_type),
>                                 TYPE_PRECISION (new_type));
>    precision *= 2;
> -  if (precision * 2 > TYPE_PRECISION (type))
> +
> +  /* Check if the mismatch is only in the sign and if we have
> +     UNPROM_TYPE then allow it if there is enough precision to
> +     not lose any information during the conversion.  */
> +  if (unprom_type
> +      && TYPE_SIGN (unprom_type) == SIGNED
> +      && tree_nop_conversion_p (*common_type, new_type))
> +       return true;
> +
> +  /* The resulting application is unsigned, check if we have enough
> +     precision to perform the operation.  */
> +  if (precision * 2 > TYPE_PRECISION (unprom_type ? unprom_type : type))
>      return false;
>
>    *common_type = build_nonstandard_integer_type (precision, false);
> @@ -532,6 +548,10 @@ vect_joust_widened_type (tree type, tree new_type, tree 
> *common_type)
>     to a type that (a) is narrower than the result of STMT_INFO and
>     (b) can hold all leaf operand values.
>
> +   If UNPROM_TYPE then allow that the signs of the operands
> +   may differ in signs but not in precision and that the resulting type
> +   of the operation on the operands is compatible with UNPROM_TYPE.
> +
>     Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
>     exists.  */
>
> @@ -539,7 +559,8 @@ static unsigned int
>  vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code 
> code,
>                       tree_code widened_code, bool shift_p,
>                       unsigned int max_nops,
> -                     vect_unpromoted_value *unprom, tree *common_type)
> +                     vect_unpromoted_value *unprom, tree *common_type,
> +                     tree unprom_type = NULL)
>  {
>    /* Check for an integer operation with the right code.  */
>    gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
> @@ -600,7 +621,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info 
> stmt_info, tree_code code,
>                 = vinfo->lookup_def (this_unprom->op);
>               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
>                                            widened_code, shift_p, max_nops,
> -                                          this_unprom, common_type);
> +                                          this_unprom, common_type,
> +                                          unprom_type);
>               if (nops == 0)
>                 return 0;
>
> @@ -617,7 +639,7 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info 
> stmt_info, tree_code code,
>               if (i == 0)
>                 *common_type = this_unprom->type;
>               else if (!vect_joust_widened_type (type, this_unprom->type,
> -                                                common_type))
> +                                                common_type, unprom_type))
>                 return 0;
>             }
>         }
> @@ -799,12 +821,15 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info 
> stmt_info, tree type,
>  }
>
>  /* Invoke vect_convert_input for N elements of UNPROM and store the
> -   result in the corresponding elements of RESULT.  */
> +   result in the corresponding elements of RESULT.
> +
> +   If ALLOW_SHORT_SIGN_MISMATCH then don't convert the types if they only
> +   differ by sign.  */
>
>  static void
>  vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int 
> n,
>                      tree *result, tree type, vect_unpromoted_value *unprom,
> -                    tree vectype)
> +                    tree vectype, bool allow_short_sign_mismatch = false)
>  {
>    for (unsigned int i = 0; i < n; ++i)
>      {
> @@ -812,8 +837,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info 
> stmt_info, unsigned int n,
>        for (j = 0; j < i; ++j)
>         if (unprom[j].op == unprom[i].op)
>           break;
> +
>        if (j < i)
>         result[i] = result[j];
> +      else if (allow_short_sign_mismatch
> +              && tree_nop_conversion_p (type, unprom[i].type))
> +       result[i] = unprom[i].op;
>        else
>         result[i] = vect_convert_input (vinfo, stmt_info,
>                                         type, &unprom[i], vectype);
> @@ -888,21 +917,24 @@ vect_reassociating_reduction_p (vec_info *vinfo,
>
>     Try to find the following pattern:
>
> -     type x_t, y_t;
> +     type1a x_t
> +     type1b y_t;
>       TYPE1 prod;
>       TYPE2 sum = init;
>     loop:
>       sum_0 = phi <init, sum_1>
>       S1  x_t = ...
>       S2  y_t = ...
> -     S3  x_T = (TYPE1) x_t;
> -     S4  y_T = (TYPE1) y_t;
> +     S3  x_T = (TYPE3) x_t;
> +     S4  y_T = (TYPE4) y_t;
>       S5  prod = x_T * y_T;
>       [S6  prod = (TYPE2) prod;  #optional]
>       S7  sum_1 = prod + sum_0;
>
> -   where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is 
> the
> -   same size of 'TYPE1' or bigger. This is a special case of a reduction
> +   where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
> +   the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
> +   'type1a' and 'type1b' can differ. 'TYPE2' is the same size of 'TYPE1' or
> +   bigger and must be the same sign. This is a special case of a reduction
>     computation.


What are TYPE3 and TYPE4 in the above?  AFAICT the x_T and y_T casts
should still be to TYPE1, since the types of x_T and y_T need to agree.

The sign of TYPE2 shouldn't matter, since TYPE2 is only used for
the addition.

> @@ -939,15 +971,16 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
>
>    /* Look for the following pattern
>            DX = (TYPE1) X;
> -          DY = (TYPE1) Y;
> +         DY = (TYPE2) Y;
>            DPROD = DX * DY;
> -          DDPROD = (TYPE2) DPROD;
> +         DDPROD = (TYPE3) DPROD;
>            sum_1 = DDPROD + sum_0;
>       In which
>       - DX is double the size of X
>       - DY is double the size of Y
>       - DX, DY, DPROD all have the same type but the sign
> -       between DX, DY and DPROD can differ.
> +       between DX, DY and DPROD can differ. The sign of DPROD
> +       is one of the signs of DX or DY.
>       - sum is the same size of DPROD or bigger
>       - sum has been recognized as a reduction variable.

These changes don't look right: DY has to be the same type as DX.
(What's different with usdot is that X and Y can be different signs.)

> @@ -986,20 +1019,29 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
>       inside the loop (in case we are analyzing an outer-loop).  */
>    vect_unpromoted_value unprom0[2];
>    if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
> -                            false, 2, unprom0, &half_type))
> +                            false, 2, unprom0, &half_type,
> +                            TREE_TYPE (unprom_mult.op)))
>      return NULL;
>
> +  /* Check to see if there is a sign change happening in the operands of the
> +     multiplication and pick the appropriate optab subtype.  */
> +  enum optab_subtype subtype;
> +  if (TYPE_SIGN (unprom0[0].type) == TYPE_SIGN (unprom0[1].type))
> +    subtype = optab_default;
> +  else
> +    subtype = optab_vector_mixed_sign;
> +

Doesn't this check the signs of the uncast operands?  What really matters
is how things stand after the result of the (possible) casts to half_type.

E.g.:

   signed short x;
   unsigned char y;
   int z;

   z = (int) x * (int) y + z;

is an sdot operation with half_type signed short, rather than a usdot
operation.

How about instead passing a optab_subtype* to vect_widened_op_tree, in
place of the unprom_mult.op type?  When this optab_subtype* is nonnull,
the joust operation is allowed to fail as long as:

  tree_nop_conversion_p (this_unprom->type, common_type)

is true.  vect_widened_op_tree would set the optab_subtype to
optab_vector_mixed_sign to indicate this case.

We should make sure that we handle:

   unsigned short x;
   signed char y;
   int z;

   z = (int) x * (int) y + z;

correctly though: this should be a usdot operation in which y
is cast to signed short.  I'm not sure whether the patch would
insert the needed cast.

Thanks,
Richard

>    vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
>
>    tree half_vectype;
>    if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, 
> half_type,
> -                                       type_out, &half_vectype))
> +                                       type_out, &half_vectype, subtype))
>      return NULL;
>
>    /* Get the inputs in the appropriate types.  */
>    tree mult_oprnd[2];
>    vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
> -                      unprom0, half_vectype);
> +                      unprom0, half_vectype, true);
>
>    var = vect_recog_temp_ssa_var (type, NULL);
>    pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,

Re: [PATCH 1/4]middle-end Vect: Add support for dot-product where the sign for the multiplicant changes.

Reply via email to