Thanks for looking at this.

Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> Hi,
> As mentioned in PR, for the following test-case:
>
> typedef unsigned char uint8_t;
>
> static inline uint8_t
> x264_clip_uint8(uint8_t x)
> {
>   uint8_t t = -x;
>   uint8_t t1 = x & ~63;
>   return (t1 != 0) ? t : x;
> }
>
> void
> mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> {
>   for (int x = 0; x < n*16; x++)
>     dst[x] = x264_clip_uint8(src[x]);
> }
>
> -O3 -mcpu=generic+sve generates following code for the inner loop:
>
> .L3:
>         ld1b    z0.b, p0/z, [x1, x2]
>         movprfx z2, z0
>         and     z2.b, z2.b, #0xc0
>         movprfx z1, z0
>         neg     z1.b, p1/m, z0.b
>         cmpeq   p2.b, p1/z, z2.b, #0
>         sel     z0.b, p2, z0.b, z1.b
>         st1b    z0.b, p0, [x0, x2]
>         add     x2, x2, x4
>         whilelo p0.b, w2, w3
>         b.any   .L3
>
> The sel is redundant since we could conditionally negate z0 based on
> the predicate
> comparing z2 with 0.
>
> As suggested in the PR, the attached patch, introduces a new
> conditional internal function .COND_NEG, and in gimple-isel replaces
> the following sequence:
>    op2 = -op1
>    op0 = A cmp B
>    lhs = op0 ? op1 : op2
>
> with:
>    op0 = A inverted_cmp B
>    lhs = .COND_NEG (op0, op1, op1).
>
> lhs = .COD_NEG (op0, op1, op1)
> implies
> lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
>
> With patch, it generates the following code-gen:
> .L3:
>         ld1b    z0.b, p0/z, [x1, x2]
>         movprfx z1, z0
>         and     z1.b, z1.b, #0xc0
>         cmpne   p1.b, p2/z, z1.b, #0
>         neg     z0.b, p1/m, z0.b
>         st1b    z0.b, p0, [x0, x2]
>         add     x2, x2, x4
>         whilelo p0.b, w2, w3
>         b.any   .L3
>
> While it seems to work for this test-case, I am not entirely sure if
> the patch is correct. Does it look in the right direction ?

For binary ops we use match.pd rather than isel:

(for uncond_op (UNCOND_BINARY)
     cond_op (COND_BINARY)
 (simplify
  (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
  (with { tree op_type = TREE_TYPE (@4); }
   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
        && is_truth_type_for (op_type, TREE_TYPE (@0)))
    (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
 (simplify
  (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
  (with { tree op_type = TREE_TYPE (@4); }
   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
        && is_truth_type_for (op_type, TREE_TYPE (@0)))
    (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))

I think it'd be good to do the same here, using new (UN)COND_UNARY
iterators.  (The iterators will only have one value to start with,
but other unary ops could get the same treatment in future.)

Richard


>
> Thanks,
> Prathamesh
>
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index 38e90933c3e..5b0dd3c1993 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "optabs.h"
>  #include "gimple-fold.h"
>  #include "internal-fn.h"
> +#include "fold-const.h"
> +#include "tree-pretty-print.h"
>  
>  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
>     internal function based on vector type of selected expansion.
> @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>             return new_stmt;
>           }
>  
> +       /* Replace:
> +          op2 = -op1
> +          op0 = A cmp B
> +          lhs = op0 ? op1 : op2
> +
> +          with:
> +          op0 = A inverted_cmp B
> +          lhs = .COND_NEG (op0, op1, op1).  */
> +
> +       gassign *op1_def = nullptr;
> +       if (TREE_CODE (op1) == SSA_NAME)
> +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
> +
> +       gassign *op2_def = nullptr;
> +       if (TREE_CODE (op2) == SSA_NAME)
> +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
> +
> +       if (can_compute_op0 && op1_def && op2_def
> +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
> +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
> +         {
> +           auto inverted_code
> +             = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), 
> true);
> +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
> +           auto gsi2 = gsi_for_stmt (op2_def);
> +           gsi_remove (&gsi2, true);
> +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, 
> op1);
> +         }
> +
>         if (can_compute_op0
>             && used_vec_cond_exprs >= 2
>             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 78db25bbac4..b57c7a4ed3e 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) 
> (internal_fn, gcall *) = {
>    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>    T (LSHIFT_EXPR, IFN_COND_SHL) \
> -  T (RSHIFT_EXPR, IFN_COND_SHR)
> +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> +  T (NEGATE_EXPR, IFN_COND_NEG)
>  
>  /* Return a function that only performs CODE when a certain condition is met
>     and that uses a given fallback value otherwise.  For example, if CODE is
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 88169ef4656..db40d1bfd18 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, 
> cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>  
> +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, 
> cond_unary)
> +
>  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>  
>  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 201b8aae1c0..fc65a3a7c23 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>  OPTAB_D (cond_fms_optab, "cond_fms$a")
>  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> +OPTAB_D (cond_neg_optab, "cond_neg$a")
>  OPTAB_D (cmov_optab, "cmov$a6")
>  OPTAB_D (cstore_optab, "cstore$a4")
>  OPTAB_D (ctrap_optab, "ctrap$a4")

Reply via email to