On Wed, 13 Oct 2021 at 13:26, Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> > On Mon, 11 Oct 2021 at 20:42, Richard Sandiford
> > <richard.sandif...@arm.com> wrote:
> >>
> >> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> >> > On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
> >> > <richard.sandif...@arm.com> wrote:
> >> >>
> >> >> Thanks for looking at this.
> >> >>
> >> >> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> >> >> > Hi,
> >> >> > As mentioned in PR, for the following test-case:
> >> >> >
> >> >> > typedef unsigned char uint8_t;
> >> >> >
> >> >> > static inline uint8_t
> >> >> > x264_clip_uint8(uint8_t x)
> >> >> > {
> >> >> >   uint8_t t = -x;
> >> >> >   uint8_t t1 = x & ~63;
> >> >> >   return (t1 != 0) ? t : x;
> >> >> > }
> >> >> >
> >> >> > void
> >> >> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> >> >> > {
> >> >> >   for (int x = 0; x < n*16; x++)
> >> >> >     dst[x] = x264_clip_uint8(src[x]);
> >> >> > }
> >> >> >
> >> >> > -O3 -mcpu=generic+sve generates following code for the inner loop:
> >> >> >
> >> >> > .L3:
> >> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >> >         movprfx z2, z0
> >> >> >         and     z2.b, z2.b, #0xc0
> >> >> >         movprfx z1, z0
> >> >> >         neg     z1.b, p1/m, z0.b
> >> >> >         cmpeq   p2.b, p1/z, z2.b, #0
> >> >> >         sel     z0.b, p2, z0.b, z1.b
> >> >> >         st1b    z0.b, p0, [x0, x2]
> >> >> >         add     x2, x2, x4
> >> >> >         whilelo p0.b, w2, w3
> >> >> >         b.any   .L3
> >> >> >
> >> >> > The sel is redundant since we could conditionally negate z0 based on
> >> >> > the predicate
> >> >> > comparing z2 with 0.
> >> >> >
> >> >> > As suggested in the PR, the attached patch, introduces a new
> >> >> > conditional internal function .COND_NEG, and in gimple-isel replaces
> >> >> > the following sequence:
> >> >> >    op2 = -op1
> >> >> >    op0 = A cmp B
> >> >> >    lhs = op0 ? op1 : op2
> >> >> >
> >> >> > with:
> >> >> >    op0 = A inverted_cmp B
> >> >> >    lhs = .COND_NEG (op0, op1, op1).
> >> >> >
> >> >> > lhs = .COD_NEG (op0, op1, op1)
> >> >> > implies
> >> >> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
> >> >> >
> >> >> > With patch, it generates the following code-gen:
> >> >> > .L3:
> >> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >> >         movprfx z1, z0
> >> >> >         and     z1.b, z1.b, #0xc0
> >> >> >         cmpne   p1.b, p2/z, z1.b, #0
> >> >> >         neg     z0.b, p1/m, z0.b
> >> >> >         st1b    z0.b, p0, [x0, x2]
> >> >> >         add     x2, x2, x4
> >> >> >         whilelo p0.b, w2, w3
> >> >> >         b.any   .L3
> >> >> >
> >> >> > While it seems to work for this test-case, I am not entirely sure if
> >> >> > the patch is correct. Does it look in the right direction ?
> >> >>
> >> >> For binary ops we use match.pd rather than isel:
> >> >>
> >> >> (for uncond_op (UNCOND_BINARY)
> >> >>      cond_op (COND_BINARY)
> >> >>  (simplify
> >> >>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> >> >>   (with { tree op_type = TREE_TYPE (@4); }
> >> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> >> >> op_type)
> >> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >> >>     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >> >>  (simplify
> >> >>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> >> >>   (with { tree op_type = TREE_TYPE (@4); }
> >> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> >> >> op_type)
> >> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >> >>     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type 
> >> >> @1)))))))
> >> >>
> >> >> I think it'd be good to do the same here, using new (UN)COND_UNARY
> >> >> iterators.  (The iterators will only have one value to start with,
> >> >> but other unary ops could get the same treatment in future.)
> >> > Thanks for the suggestions.
> >> > The attached patch adds a pattern to match.pd to replace:
> >> > cond = a cmp b
> >> > r = cond ? x : -x
> >> > with:
> >> > cond = a inverted_cmp b
> >> > r = cond ? -x : x
> >> >
> >> > Code-gen with patch for inner loop:
> >> > .L3:
> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >         movprfx z1, z0
> >> >         and     z1.b, z1.b, #0xc0
> >> >         cmpne   p1.b, p2/z, z1.b, #0
> >> >         neg     z0.b, p1/m, z0.b
> >> >         st1b    z0.b, p0, [x0, x2]
> >> >         add     x2, x2, x4
> >> >         whilelo p0.b, w2, w3
> >> >         b.any   .L3
> >> >
> >> > Does it look OK ?
> >> > I didn't add it under (UN)COND_UNARY since it inverts the comparison,
> >> > which we might not want to do for other unary ops ?
> >>
> >> I think we should follow the structure of the current binary and
> >> ternary patterns: cope with unary operations in either arm of the
> >> vec_cond and use bit_not for the case in which the unary operation
> >> is in the “false” arm of the vec_cond.
> >>
> >> The bit_not will be folded away if the comparison can be inverted,
> >> but it will be left in-place if the comparison can't be inverted
> >> (as for some FP comparisons).
> > Ah indeed, done in the attached patch.
> > Does it look OK ?
>
> Yeah, this is OK, thanks.
Hi,
Sorry for late reply. The patch regressed cond_unary_4.c,
For the following test:

void __attribute__((noipa))
test_int64_t_neg(int64_t *__restrict r, int64_t *__restrict a,
                 int64_t *__restrict pred, int n) {
  for (int i = 0; i < n; ++i)
    r[i] = pred[i] ? (-(a[i])) : 0;
}

code-gen difference before/after patch:
@@ -15,3 +15,2 @@
        whilelo p0.d, wzr, w3
-       mov     z2.b, #0
        .p2align 3,,7
@@ -21,3 +20,3 @@
        ld1d    z1.d, p1/z, [x1, x4, lsl 3]
-       movprfx z0, z2
+       movprfx z0.d, p1/z, z1.d
        neg     z0.d, p1/m, z1.d

and similarly for _neg tests of different type.
I guess using /z is an improvement ?

The attached patch adds view_convert to the pattern (needed for
int16_t) and adjusts cond_unary_4.c accordingly.
Bootstrapped + tested on aarch64-linux-gnu.
OK to commit ?

Thanks,
Prathamesh
>
> Richard
>
> >
> > Thanks,
> > Prathamesh
> >>
> >> Thanks,
> >> Richard
> >>
> >> >
> >> > Also, I am not sure, how to test if target supports conditional
> >> > internal function ?
> >> > I tried to use:
> >> > (for cmp (tcc_comparison)
> >> >      icmp (inverted_tcc_comparison)
> >> >  (simplify
> >> >   (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> >> >    (with { auto op_type = TREE_TYPE (@2); }
> >> >     (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
> >> >          && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >> >       (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))))
> >> >
> >> > but both the conditions seem to fail.
> >> >
> >> > Thanks,
> >> > Prathamesh
> >> >
> >> >
> >> >>
> >> >> Richard
> >> >>
> >> >>
> >> >> >
> >> >> > Thanks,
> >> >> > Prathamesh
> >> >> >
> >> >> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> >> >> > index 38e90933c3e..5b0dd3c1993 100644
> >> >> > --- a/gcc/gimple-isel.cc
> >> >> > +++ b/gcc/gimple-isel.cc
> >> >> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
> >> >> >  #include "optabs.h"
> >> >> >  #include "gimple-fold.h"
> >> >> >  #include "internal-fn.h"
> >> >> > +#include "fold-const.h"
> >> >> > +#include "tree-pretty-print.h"
> >> >> >
> >> >> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into 
> >> >> > calls to
> >> >> >     internal function based on vector type of selected expansion.
> >> >> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr 
> >> >> > (gimple_stmt_iterator *gsi,
> >> >> >             return new_stmt;
> >> >> >           }
> >> >> >
> >> >> > +       /* Replace:
> >> >> > +          op2 = -op1
> >> >> > +          op0 = A cmp B
> >> >> > +          lhs = op0 ? op1 : op2
> >> >> > +
> >> >> > +          with:
> >> >> > +          op0 = A inverted_cmp B
> >> >> > +          lhs = .COND_NEG (op0, op1, op1).  */
> >> >> > +
> >> >> > +       gassign *op1_def = nullptr;
> >> >> > +       if (TREE_CODE (op1) == SSA_NAME)
> >> >> > +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
> >> >> > +
> >> >> > +       gassign *op2_def = nullptr;
> >> >> > +       if (TREE_CODE (op2) == SSA_NAME)
> >> >> > +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
> >> >> > +
> >> >> > +       if (can_compute_op0 && op1_def && op2_def
> >> >> > +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
> >> >> > +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
> >> >> > +         {
> >> >> > +           auto inverted_code
> >> >> > +             = invert_tree_comparison (gimple_assign_rhs_code 
> >> >> > (def_stmt), true);
> >> >> > +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
> >> >> > +           auto gsi2 = gsi_for_stmt (op2_def);
> >> >> > +           gsi_remove (&gsi2, true);
> >> >> > +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, 
> >> >> > op1, op1);
> >> >> > +         }
> >> >> > +
> >> >> >         if (can_compute_op0
> >> >> >             && used_vec_cond_exprs >= 2
> >> >> >             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> >> >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> >> > index 78db25bbac4..b57c7a4ed3e 100644
> >> >> > --- a/gcc/internal-fn.c
> >> >> > +++ b/gcc/internal-fn.c
> >> >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) 
> >> >> > (internal_fn, gcall *) = {
> >> >> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >> >> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >> >> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> >> >> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> >> >> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> >> >> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >> >> >
> >> >> >  /* Return a function that only performs CODE when a certain 
> >> >> > condition is met
> >> >> >     and that uses a given fallback value otherwise.  For example, if 
> >> >> > CODE is
> >> >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> >> >> > index 88169ef4656..db40d1bfd18 100644
> >> >> > --- a/gcc/internal-fn.def
> >> >> > +++ b/gcc/internal-fn.def
> >> >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, 
> >> >> > cond_fms, cond_ternary)
> >> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >> >> >
> >> >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, 
> >> >> > cond_unary)
> >> >> > +
> >> >> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >> >> >
> >> >> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> >> >> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> >> >> > index 201b8aae1c0..fc65a3a7c23 100644
> >> >> > --- a/gcc/optabs.def
> >> >> > +++ b/gcc/optabs.def
> >> >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >> >> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >> >> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >> >> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> >> >> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >> >> >  OPTAB_D (cmov_optab, "cmov$a6")
> >> >> >  OPTAB_D (cstore_optab, "cstore$a4")
> >> >> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> >> >
> >> > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> >> > index 7112c116835..90cf131af48 100644
> >> > --- a/gcc/gimple-match-head.c
> >> > +++ b/gcc/gimple-match-head.c
> >> > @@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, 
> >> > gimple_match_op *res_op,
> >> >        if (!gimple_resimplify3 (seq, &cond_op, valueize))
> >> >       return false;
> >> >        break;
> >> > +    case 1:
> >> > +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> >> > +     return false;
> >> > +      break;
> >> >      default:
> >> >        gcc_unreachable ();
> >> >      }
> >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > index 78db25bbac4..b57c7a4ed3e 100644
> >> > --- a/gcc/internal-fn.c
> >> > +++ b/gcc/internal-fn.c
> >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) 
> >> > (internal_fn, gcall *) = {
> >> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> >> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> >> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> >> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >> >
> >> >  /* Return a function that only performs CODE when a certain condition 
> >> > is met
> >> >     and that uses a given fallback value otherwise.  For example, if 
> >> > CODE is
> >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> >> > index 88169ef4656..db40d1bfd18 100644
> >> > --- a/gcc/internal-fn.def
> >> > +++ b/gcc/internal-fn.def
> >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, 
> >> > cond_fms, cond_ternary)
> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >> >
> >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, 
> >> > cond_unary)
> >> > +
> >> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >> >
> >> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> >> > diff --git a/gcc/match.pd b/gcc/match.pd
> >> > index a9791ceb74a..fde6d32b2c4 100644
> >> > --- a/gcc/match.pd
> >> > +++ b/gcc/match.pd
> >> > @@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >> >                                                 wi::mask (tree_to_uhwi 
> >> > (@1),
> >> >                                                 false, prec)); })
> >> >                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
> >> > +#if GIMPLE
> >> > +
> >> > +/* Simplify:
> >> > +
> >> > +     cond = a cmp b
> >> > +     r = cond ? x : -x
> >> > +
> >> > +    to:
> >> > +
> >> > +     cond = a inverted_cmp b
> >> > +     r = cond ? -x : x.  */
> >> > +
> >> > +(for cmp (tcc_comparison)
> >> > +     icmp (inverted_tcc_comparison)
> >> > + (simplify
> >> > +  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> >> > +   (with { auto op_type = TREE_TYPE (@2); }
> >> > +    (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))
> >> >
> >> >  /* Simplify:
> >> >
> >> > @@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >> >     conditional internal functions must support the same comparisons
> >> >     inside and outside a VEC_COND_EXPR.  */
> >> >
> >> > -#if GIMPLE
> >> >  (for uncond_op (UNCOND_BINARY)
> >> >       cond_op (COND_BINARY)
> >> >   (simplify
> >> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> >> > index 201b8aae1c0..fc65a3a7c23 100644
> >> > --- a/gcc/optabs.def
> >> > +++ b/gcc/optabs.def
> >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> >> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >> >  OPTAB_D (cmov_optab, "cmov$a6")
> >> >  OPTAB_D (cstore_optab, "cstore$a4")
> >> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> >
> > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> > index 7112c116835..9d88b2f8551 100644
> > --- a/gcc/gimple-match-head.c
> > +++ b/gcc/gimple-match-head.c
> > @@ -870,6 +870,10 @@ try_conditional_simplification (internal_fn ifn, 
> > gimple_match_op *res_op,
> >    memcpy (cond_op.ops, res_op->ops + 1, (num_ops - 1) * sizeof 
> > *cond_op.ops);
> >    switch (num_ops - 2)
> >      {
> > +    case 1:
> > +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> > +     return false;
> > +      break;
> >      case 2:
> >        if (!gimple_resimplify2 (seq, &cond_op, valueize))
> >       return false;
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 78db25bbac4..b57c7a4ed3e 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) 
> > (internal_fn, gcall *) = {
> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >
> >  /* Return a function that only performs CODE when a certain condition is 
> > met
> >     and that uses a given fallback value otherwise.  For example, if CODE is
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 88169ef4656..798a50ba332 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, 
> > cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >
> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST, cond_neg, cond_unary)
> > +
> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >
> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index a9791ceb74a..1a4c157bd31 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -78,6 +78,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
> >  DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
> >  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >
> > +/* Unary operations and their associated IFN_COND_* function.  */
> > +(define_operator_list UNCOND_UNARY
> > +  negate)
> > +(define_operator_list COND_UNARY
> > +  IFN_COND_NEG)
> > +
> >  /* Binary operations and their associated IFN_COND_* function.  */
> >  (define_operator_list UNCOND_BINARY
> >    plus minus
> > @@ -7038,6 +7044,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >                                                 false, prec)); })
> >                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
> >
> > +#if GIMPLE
> > +
> > +/* Simplify:
> > +     a = op a1
> > +     r = cond ? a : b
> > +     --> r = .COND_FN (cond, a, b)
> > +and,
> > +    a = op a1
> > +    r = cond ? b : a
> > +    --> r = .COND_FN (~cond, b, a).  */
> > +
> > +(for uncond_op (UNCOND_UNARY)
> > +     cond_op (COND_UNARY)
> > + (simplify
> > +  (vec_cond @0 (uncond_op@3 @1) @2)
> > +   (with { tree op_type = TREE_TYPE (@3); }
> > +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > +     (cond_op @0 @1 @2))))
> > + (simplify
> > +  (vec_cond @0 @1 (uncond_op@3 @2))
> > +   (with { tree op_type = TREE_TYPE (@3); }
> > +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
> > op_type)
> > +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > +     (cond_op (bit_not @0) @2 @1)))))
> > +
> >  /* Simplify:
> >
> >       a = a1 op a2
> > @@ -7056,7 +7088,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >     conditional internal functions must support the same comparisons
> >     inside and outside a VEC_COND_EXPR.  */
> >
> > -#if GIMPLE
> >  (for uncond_op (UNCOND_BINARY)
> >       cond_op (COND_BINARY)
> >   (simplify
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 201b8aae1c0..fc65a3a7c23 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >  OPTAB_D (cmov_optab, "cmov$a6")
> >  OPTAB_D (cstore_optab, "cstore$a4")
> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c 
> > b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> > new file mode 100644
> > index 00000000000..2f92224cecb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -mcpu=generic+sve" } */
> > +
> > +typedef unsigned char uint8_t;
> > +
> > +static inline uint8_t
> > +x264_clip_uint8(uint8_t x)
> > +{
> > +  uint8_t t = -x;
> > +  uint8_t t1 = x & ~63;
> > +  return (t1 != 0) ? t : x;
> > +}
> > +
> > +void
> > +mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> > +{
> > +  for (int x = 0; x < n*16; x++)
> > +    dst[x] = x264_clip_uint8(src[x]);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not {\tsel} } } */
gcc/ChangeLog:
        PR target/93183
        * gimple-match-head.c (try_conditional_simplification): Add case for 
single operand.
        * internal-fn.def: Add entry for COND_NEG internal function.
        * internal-fn.c (FOR_EACH_CODE_MAPPING): Add entry for
        NEGATE_EXPR, COND_NEG mapping.
        * optabs.def: Add entry for cond_neg_optab.
        * match.pd (UNCOND_UNARY, COND_UNARY): New operator lists.
        (vec_cond COND (foo A) B) -> (IFN_COND_FOO COND A B): New pattern.
        (vec_cond COND B (foo A)) -> (IFN_COND_FOO ~COND A B): Likewise.

gcc/testsuite/ChangeLog:
        PR target/93183
        * gcc.target/aarch64/sve/cond_unary_4.c: Adjust.
        * gcc.target/aarch64/sve/pr93183.c: New test.

diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 7112c116835..9d88b2f8551 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -870,6 +870,10 @@ try_conditional_simplification (internal_fn ifn, 
gimple_match_op *res_op,
   memcpy (cond_op.ops, res_op->ops + 1, (num_ops - 1) * sizeof *cond_op.ops);
   switch (num_ops - 2)
     {
+    case 1:
+      if (!gimple_resimplify1 (seq, &cond_op, valueize))
+       return false;
+      break;
     case 2:
       if (!gimple_resimplify2 (seq, &cond_op, valueize))
        return false;
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78db25bbac4..b57c7a4ed3e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) 
(internal_fn, gcall *) = {
   T (BIT_IOR_EXPR, IFN_COND_IOR) \
   T (BIT_XOR_EXPR, IFN_COND_XOR) \
   T (LSHIFT_EXPR, IFN_COND_SHL) \
-  T (RSHIFT_EXPR, IFN_COND_SHR)
+  T (RSHIFT_EXPR, IFN_COND_SHR) \
+  T (NEGATE_EXPR, IFN_COND_NEG)
 
 /* Return a function that only performs CODE when a certain condition is met
    and that uses a given fallback value otherwise.  For example, if CODE is
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 88169ef4656..798a50ba332 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, 
cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
 
+DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST, cond_neg, cond_unary)
+
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
 DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/match.pd b/gcc/match.pd
index a9791ceb74a..f5a741705a6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -78,6 +78,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
 DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
 DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
+/* Unary operations and their associated IFN_COND_* function.  */
+(define_operator_list UNCOND_UNARY
+  negate)
+(define_operator_list COND_UNARY
+  IFN_COND_NEG)
+
 /* Binary operations and their associated IFN_COND_* function.  */
 (define_operator_list UNCOND_BINARY
   plus minus
@@ -7038,6 +7044,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
                                                  false, prec)); })
                     { build_zero_cst (TREE_TYPE (@0)); }))))))))
 
+#if GIMPLE
+
+/* Simplify:
+     a = op a1
+     r = cond ? a : b
+     --> r = .COND_FN (cond, a, b)
+and,
+    a = op a1
+    r = cond ? b : a
+    --> r = .COND_FN (~cond, b, a).  */
+
+(for uncond_op (UNCOND_UNARY)
+     cond_op (COND_UNARY)
+ (simplify
+  (vec_cond @0 (view_convert? (uncond_op@3 @1)) @2)
+   (with { tree op_type = TREE_TYPE (@3); }
+    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+        && is_truth_type_for (op_type, TREE_TYPE (@0)))
+     (cond_op @0 @1 @2))))
+ (simplify
+  (vec_cond @0 @1 (view_convert? (uncond_op@3 @2)))
+   (with { tree op_type = TREE_TYPE (@3); }
+    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+        && is_truth_type_for (op_type, TREE_TYPE (@0)))
+     (cond_op (bit_not @0) @2 @1)))))
+
 /* Simplify:
 
      a = a1 op a2
@@ -7056,7 +7088,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    conditional internal functions must support the same comparisons
    inside and outside a VEC_COND_EXPR.  */
 
-#if GIMPLE
 (for uncond_op (UNCOND_BINARY)
      cond_op (COND_BINARY)
  (simplify
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 201b8aae1c0..fc65a3a7c23 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
 OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
+OPTAB_D (cond_neg_optab, "cond_neg$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
index 4604365fbef..cedc5b7c549 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
@@ -56,7 +56,11 @@ TEST_ALL (DEF_LOOP)
    we're relying on combine to merge a SEL and an arithmetic operation,
    and the SEL doesn't allow the "false" value to be zero when the "true"
    value is a register.  */
-/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 7 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-9]/z, 
z[0-9]+\.b} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-9]/z, 
z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-9]/z, 
z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-9]/z, 
z[0-9]+\.d} 2 } } */
 
 /* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
new file mode 100644
index 00000000000..2f92224cecb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=generic+sve" } */
+
+typedef unsigned char uint8_t;
+
+static inline uint8_t
+x264_clip_uint8(uint8_t x)
+{
+  uint8_t t = -x;
+  uint8_t t1 = x & ~63; 
+  return (t1 != 0) ? t : x; 
+}
+
+void
+mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
+{
+  for (int x = 0; x < n*16; x++)
+    dst[x] = x264_clip_uint8(src[x]);
+}
+
+/* { dg-final { scan-assembler-not {\tsel} } } */

Reply via email to