On Thu, 7 Jan 2021, Richard Sandiford wrote:

> Richard Biener <rguent...@suse.de> writes:
> > On Wed, 6 Jan 2021, Richard Sandiford wrote:
> >
> >> PR98560 is about a case in which the vectoriser initially generates:
> >> 
> >>   mask_1 = a < 0;
> >>   mask_2 = mask_1 & ...;
> >>   res = VEC_COND_EXPR <mask_2, b, c>;
> >> 
> >> The vectoriser thus expects res to be calculated using vcond_mask.
> >> However, we later manage to fold mask_2 to mask_1, leaving:
> >> 
> >>   mask_1 = a < 0;
> >>   res = VEC_COND_EXPR <mask_1, b, c>;
> >> 
> >> gimple-isel then required a combined vcond to exist.
> >> 
> >> On most targets, it's not too onerous to provide all possible
> >> (compare x select) combinations.  For each data mode, you just
> >> need to provide unsigned comparisons, signed comparisons, and
> >> floating-point comparisons, with the data mode and type of
> >> comparison uniquely determining the mode of the compared values.
> >> But for targets like SVE that support “unpacked” vectors,
> >> it's not that simple: the level of unpacking adds another
> >> degree of freedom.
> >> 
> >> Rather than insist that the combined versions exist, I think
> >> we should be prepared to fall back to using separate comparisons
> >> and vcond_masks.  I think that makes more sense on targets like
> >> AArch64 and AArch32 in which compares and selects are fundementally
> >> separate operations anyway.
> >
> > Indeed the mask variants (thus being able to expand the comparison)
> > are more fundamental.  I guess you're running into this path because
> > we did not consider using vcond_mask because of
> >
> >           if (used_vec_cond_exprs >= 2
> >               && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> >                   != CODE_FOR_nothing)
> >               && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> >             {
> >               /* Keep the SSA name and use vcond_mask.  */
> >               tcode = TREE_CODE (op0);
> >             }
> >
> > not triggering?  Which also means your patch fails to check/assert
> > that we can expand_vec_cmp_expr_p the separate compare?
> >
> >> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
> >
> > It does feel like the function could need some refactoring ...
> >
> > But OK - preferably with the assertion that we can actually
> > expand the compare (I suggest to do the expand_vec_cmp_expr_p
> > above unconditionally and have a 'global' cannot_expand_mask
> > flag defaulted to false and checked in the new path).
> 
> OK, how does this look?  It's not quite what you said because
> I wanted to avoid the double negation in !cannot.

LGTM.

Thanks,
Richard.

> Thanks,
> Richard
> 
> 
> gcc/
>       PR tree-optimization/98560
>       * gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use
>       IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK.
> 
> gcc/testsuite/
>       PR tree-optimization/98560
>       * gcc.dg/vect/pr98560-1.c: New test.
> ---
>  gcc/gimple-isel.cc                    | 26 +++++++++++++++++++-------
>  gcc/testsuite/gcc.dg/vect/pr98560-1.c | 17 +++++++++++++++++
>  2 files changed, 36 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr98560-1.c
> 
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index d40338ce4a2..0f3d6bba229 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>        return gimple_build_assign (lhs, tem3);
>      }
>  
> +  bool can_compute_op0 = true;
>    gcc_assert (!COMPARISON_CLASS_P (op0));
>    if (TREE_CODE (op0) == SSA_NAME)
>      {
> @@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>  
>         tree op0_type = TREE_TYPE (op0);
>         tree op0a_type = TREE_TYPE (op0a);
> +       if (TREE_CODE_CLASS (tcode) == tcc_comparison)
> +         can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
> +                                                  tcode);
>  
>         /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
>  
> -       if (integer_minus_onep (op1)
> +       if (can_compute_op0
> +           && integer_minus_onep (op1)
>             && integer_zerop (op2)
> -           && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))
> -           && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> +           && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
>           {
>             tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
>             gassign *new_stmt = gimple_build_assign (lhs, conv_op);
> @@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>             return new_stmt;
>           }
>  
> -       if (used_vec_cond_exprs >= 2
> +       if (can_compute_op0
> +           && used_vec_cond_exprs >= 2
>             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> -               != CODE_FOR_nothing)
> -           && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> +               != CODE_FOR_nothing))
>           {
>             /* Keep the SSA name and use vcond_mask.  */
>             tcode = TREE_CODE (op0);
> @@ -254,7 +258,15 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>       }
>      }
>  
> -  gcc_assert (icode != CODE_FOR_nothing);
> +  if (icode == CODE_FOR_nothing)
> +    {
> +      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))
> +               && can_compute_op0
> +               && (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
> +                   != CODE_FOR_nothing));
> +      return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
> +    }
> +
>    tree tcode_tree = build_int_cst (integer_type_node, tcode);
>    return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
>                                    5, op0a, op0b, op1, op2, tcode_tree);
> diff --git a/gcc/testsuite/gcc.dg/vect/pr98560-1.c 
> b/gcc/testsuite/gcc.dg/vect/pr98560-1.c
> new file mode 100644
> index 00000000000..2583fc48f8a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr98560-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -fno-tree-vrp -fno-tree-fre -fno-tree-pre 
> -fno-code-hoisting -fvect-cost-model=dynamic" } */
> +/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve } } 
> */
> +
> +#include <stdint.h>
> +
> +void
> +f (uint16_t *restrict dst, uint32_t *restrict src1, float *restrict src2)
> +{
> +  int i = 0;
> +  for (int j = 0; j < 4; ++j)
> +    {
> +      uint16_t tmp = src1[i] >> 1;
> +      dst[i] = (uint16_t) (src2[i] < 0 && i < 4 ? tmp : 1);
> +      i += 1;
> +    }
> +}
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to