Re: [aarch64][vect] Support V8QI->V8HI WIDEN_ patterns

Richard Sandiford via Gcc-patches Wed, 10 Feb 2021 01:50:34 -0800

Joel Hutton <joel.hut...@arm.com> writes:
> Hi Richards,
>
> This patch adds support for the V8QI->V8HI case from widening vect patterns 
> as discussed to target PR98772.


Thanks, the approach looks good to me.  Mostly just minor comments below.

> Bootstrapped and regression tested on aarch64.
>
>
> [aarch64][vect] Support V8QI->V8HI WIDEN_ patterns
>
> In the case where 8 out of every 16 elements are widened using a
> widening pattern and the next 8 are skipped the patterns are not
> recognized. This is because they are normally used in a pair, such  as
> VEC_WIDEN_MINUS_HI/LO, to achieve a v16qi->v16hi conversion for example.
> This patch adds support for V8HI->V8QI patterns.
>
> gcc/ChangeLog:
>         PR tree-optimisation/98772
>         * optabs-tree.c (supportable_convert_operation): Add case for 
> V8QI->V8HI
>         * tree-vect-stmts.c (vect_create_vectorized_promotion_stmts): New 
> function to generate promotion stmts for V8QI->V8HI
>         (vectorizable_conversion): Add case for V8QI->V8HI
>
> gcc/testsuite/ChangeLog:
>         PR tree-optimisation/98772
>         * gcc.target/aarch64/pr98772.c: New test.
>
> diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
> index 
> c94073e3ed98f8c4cab65891f65dedebdb1ec274..b91ce3af6f0d4b3a62110bdb38f68ecc53765cad
>  100644
> --- a/gcc/optabs-tree.c
> +++ b/gcc/optabs-tree.c
> @@ -308,6 +308,40 @@ supportable_convert_operation (enum tree_code code,
>    if (!VECTOR_MODE_P (m1) || !VECTOR_MODE_P (m2))
>      return false;
>  
> +  /* The case where a widening operation is not making use of the full width 
> of
> +     of the input vector, but using the full width of the output vector.
> +     Return the non-wided code, which will be used after the inputs are

non-widened

> +     converted to the wide type.  */
> +  if ((code == WIDEN_MINUS_EXPR
> +      || code == WIDEN_PLUS_EXPR
> +      || code == WIDEN_MULT_EXPR
> +      || code == WIDEN_LSHIFT_EXPR)

Minor formatting nit, but the ||s should be indented one space more.

> +      && known_eq (TYPE_VECTOR_SUBPARTS (vectype_in),
> +               TYPE_VECTOR_SUBPARTS (vectype_out)))
> +  {
> +    switch (code)
> +    {
> +      case WIDEN_LSHIFT_EXPR:
> +     *code1 = LSHIFT_EXPR;
> +     return true;
> +     break;
> +      case WIDEN_MINUS_EXPR:
> +     *code1 = MINUS_EXPR;
> +     return true;
> +     break;
> +      case WIDEN_PLUS_EXPR:
> +     *code1 = PLUS_EXPR;
> +     return true;
> +     break;
> +      case WIDEN_MULT_EXPR:
> +     *code1 = MULT_EXPR;
> +     return true;
> +     break;
> +      default:
> +     gcc_unreachable ();
> +    }
> +  }

Rather than return true, I think we should do something like:

      if (!supportable_convert_operation (NOP_EXPR, vectype_out,
                                          vectype_in, &dummy_code))
        return false;

      optab = optab_for_tree_code (*code1, vectype_out, optab_default);
      return (optab_handler (optab, TYPE_MODE (vectype_out))
              != CODE_FOR_nothing);

to make sure that the target really does support this.

AFAICT the caller always knows when it wants the “if” statement above
to be used.  What it's doing is a bit different from what
supportable_convert_operation normally does, so it might be better
to put it into a separate function that tests whether the target
supports the non-widening form of a widening operation.

> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> f180ced312443ba1e698932d5e8362208690b3fc..b34b00f67ea67943dee7023ab9bfd19c1be5ccbe
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -4545,6 +4545,72 @@ vect_create_vectorized_promotion_stmts (vec_info 
> *vinfo,
>    *vec_oprnds0 = vec_tmp;
>  }
>  
> +/* Create vectorized promotion stmts for widening stmts using only half the
> +   potential vector size for input.  */
> +static void
> +vect_create_vectorized_promotion_stmts (vec_info *vinfo,
> +                                     vec<tree> *vec_oprnds0,
> +                                     vec<tree> *vec_oprnds1,
> +                                     stmt_vec_info stmt_info, tree vec_dest,
> +                                     gimple_stmt_iterator *gsi,
> +                                     enum tree_code code1,
> +                                     int op_type)
> +{
> +  int i;
> +  tree vop0, vop1, new_tmp;
> +  gimple *new_stmt1;
> +  gimple *new_stmt2;
> +  gimple *new_stmt3;
> +  vec<tree> vec_tmp = vNULL;
> +
> +  vec_tmp.create (vec_oprnds0->length () * 2);

It looks like the * 2 isn't needed.

> +  FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
> +    {
> +      tree new_tmp1, new_tmp2, new_tmp3, out_type;
> +
> +      gcc_assert (op_type == binary_op);
> +      vop1 = (*vec_oprnds1)[i];
> +
> +      /* Widen the first vector input.  */
> +      out_type = TREE_TYPE (vec_dest);
> +      new_tmp1 = make_ssa_name (out_type);
> +      new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
> +      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
> +      if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
> +     {
> +       /* Widen the second vector input.  */
> +       new_tmp2 = make_ssa_name (out_type);
> +       new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
> +       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
> +       /* Perform the operation.  With both vector inputs widened.  */
> +       new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
> +     }
> +      else
> +     {
> +       /* Perform the operation.  With the single vector input widened.  */
> +       new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
> +      }
> +
> +      new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
> +      gimple_assign_set_lhs (new_stmt3, new_tmp3);
> +      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
> +      if (is_gimple_call (new_stmt3))
> +     {
> +       new_tmp = gimple_call_lhs (new_stmt3);
> +     }
> +      else
> +     {
> +       new_tmp = gimple_assign_lhs (new_stmt3);
> +     }

The lhs is always new_tmp3, so it's not necessary to read it back.

> +
> +      /* Store the results for the next step.  */
> +      vec_tmp.quick_push (new_tmp);

FWIW, you could just assign to vec_oprnds[i] and not have vec_tmp,
but I don't know whether that's more or less confusing.  Either way's
fine with me.

> +    }
> +
> +  vec_oprnds0->release ();
> +  *vec_oprnds0 = vec_tmp;
> +}
> +
>  
>  /* Check if STMT_INFO performs a conversion operation that can be vectorized.
>     If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
> @@ -4697,7 +4763,13 @@ vectorizable_conversion (vec_info *vinfo,
>    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
>    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
>    if (known_eq (nunits_out, nunits_in))
> -    modifier = NONE;
> +    if (code == WIDEN_MINUS_EXPR
> +     || code == WIDEN_PLUS_EXPR
> +     || code == WIDEN_LSHIFT_EXPR
> +     || code == WIDEN_MULT_EXPR)
> +     modifier = WIDEN;

Formatting nit: the last line should be indented by 6 spaces rather than 8.

> +    else
> +      modifier = NONE;


> @@ -4743,9 +4815,21 @@ vectorizable_conversion (vec_info *vinfo,
>        return false;
> 
>      case WIDEN:
> -      if (supportable_widening_operation (vinfo, code, stmt_info, 
> vectype_out,
> -                                       vectype_in, &code1, &code2,
> -                                       &multi_step_cvt, &interm_types))
> +      if (known_eq (nunits_out, nunits_in)
> +       && (code == WIDEN_MINUS_EXPR
> +           || code == WIDEN_LSHIFT_EXPR
> +           || code == WIDEN_PLUS_EXPR
> +           || code == WIDEN_MULT_EXPR)
> +       && supportable_convert_operation (code, vectype_out, vectype_in,
> +                                         &code1))

Guess this is personal taste, sorry, since it's clearly right both ways,
but IMO it'd be better to drop the code test.  We can only get here
with nunits_out==nunits_in if we're converting a widening operation into
a non-widening operation.  If we do end up calling a separate function
(as per the comment above), then it would abort in a meaningful place
if something unexpected slips through.

> +     {
> +       gcc_assert (!(multi_step_cvt && op_type == binary_op));
> +       break;
> +     }
> +      else if (supportable_widening_operation (vinfo, code, stmt_info,
> +                                            vectype_out, vectype_in, &code1,
> +                                            &code2, &multi_step_cvt,
> +                                            &interm_types))
>       {
>         /* Binary widening operation can only be supported directly by the
>            architecture.  */
> @@ -4981,10 +5065,20 @@ vectorizable_conversion (vec_info *vinfo,
>             c1 = codecvt1;
>             c2 = codecvt2;
>           }
> -       vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
> -                                               &vec_oprnds1, stmt_info,
> -                                               this_dest, gsi,
> -                                               c1, c2, op_type);
> +       if ((code == WIDEN_MINUS_EXPR
> +            || code == WIDEN_PLUS_EXPR
> +            || code == WIDEN_LSHIFT_EXPR
> +            || code == WIDEN_MULT_EXPR)
> +           && known_eq (nunits_in, nunits_out))

Same comment here about dropping the code tests.

Thanks,
Richard

> +         vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
> +                                                 &vec_oprnds1, stmt_info,
> +                                                 this_dest, gsi,
> +                                                 c1, op_type);
> +       else
> +         vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
> +                                                 &vec_oprnds1, stmt_info,
> +                                                 this_dest, gsi,
> +                                                 c1, c2, op_type);
>       }
 
>        FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)

Re: [aarch64][vect] Support V8QI->V8HI WIDEN_ patterns

Reply via email to