On Fri, 8 Oct 2021 at 21:19, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Thanks for looking at this. > > Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > > Hi, > > As mentioned in PR, for the following test-case: > > > > typedef unsigned char uint8_t; > > > > static inline uint8_t > > x264_clip_uint8(uint8_t x) > > { > > uint8_t t = -x; > > uint8_t t1 = x & ~63; > > return (t1 != 0) ? t : x; > > } > > > > void > > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n) > > { > > for (int x = 0; x < n*16; x++) > > dst[x] = x264_clip_uint8(src[x]); > > } > > > > -O3 -mcpu=generic+sve generates following code for the inner loop: > > > > .L3: > > ld1b z0.b, p0/z, [x1, x2] > > movprfx z2, z0 > > and z2.b, z2.b, #0xc0 > > movprfx z1, z0 > > neg z1.b, p1/m, z0.b > > cmpeq p2.b, p1/z, z2.b, #0 > > sel z0.b, p2, z0.b, z1.b > > st1b z0.b, p0, [x0, x2] > > add x2, x2, x4 > > whilelo p0.b, w2, w3 > > b.any .L3 > > > > The sel is redundant since we could conditionally negate z0 based on > > the predicate > > comparing z2 with 0. > > > > As suggested in the PR, the attached patch, introduces a new > > conditional internal function .COND_NEG, and in gimple-isel replaces > > the following sequence: > > op2 = -op1 > > op0 = A cmp B > > lhs = op0 ? op1 : op2 > > > > with: > > op0 = A inverted_cmp B > > lhs = .COND_NEG (op0, op1, op1). > > > > lhs = .COD_NEG (op0, op1, op1) > > implies > > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false. > > > > With patch, it generates the following code-gen: > > .L3: > > ld1b z0.b, p0/z, [x1, x2] > > movprfx z1, z0 > > and z1.b, z1.b, #0xc0 > > cmpne p1.b, p2/z, z1.b, #0 > > neg z0.b, p1/m, z0.b > > st1b z0.b, p0, [x0, x2] > > add x2, x2, x4 > > whilelo p0.b, w2, w3 > > b.any .L3 > > > > While it seems to work for this test-case, I am not entirely sure if > > the patch is correct. Does it look in the right direction ? > > For binary ops we use match.pd rather than isel: > > (for uncond_op (UNCOND_BINARY) > cond_op (COND_BINARY) > (simplify > (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) > (with { tree op_type = TREE_TYPE (@4); } > (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) > && is_truth_type_for (op_type, TREE_TYPE (@0))) > (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)))))) > (simplify > (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) > (with { tree op_type = TREE_TYPE (@4); } > (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) > && is_truth_type_for (op_type, TREE_TYPE (@0))) > (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))))))) > > I think it'd be good to do the same here, using new (UN)COND_UNARY > iterators. (The iterators will only have one value to start with, > but other unary ops could get the same treatment in future.) Thanks for the suggestions. The attached patch adds a pattern to match.pd to replace: cond = a cmp b r = cond ? x : -x with: cond = a inverted_cmp b r = cond ? -x : x
Code-gen with patch for inner loop: .L3: ld1b z0.b, p0/z, [x1, x2] movprfx z1, z0 and z1.b, z1.b, #0xc0 cmpne p1.b, p2/z, z1.b, #0 neg z0.b, p1/m, z0.b st1b z0.b, p0, [x0, x2] add x2, x2, x4 whilelo p0.b, w2, w3 b.any .L3 Does it look OK ? I didn't add it under (UN)COND_UNARY since it inverts the comparison, which we might not want to do for other unary ops ? Also, I am not sure, how to test if target supports conditional internal function ? I tried to use: (for cmp (tcc_comparison) icmp (inverted_tcc_comparison) (simplify (vec_cond (cmp@2 @0 @1) @3 (negate @3)) (with { auto op_type = TREE_TYPE (@2); } (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type) && is_truth_type_for (op_type, TREE_TYPE (@0))) (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))) but both the conditions seem to fail. Thanks, Prathamesh > > Richard > > > > > > Thanks, > > Prathamesh > > > > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc > > index 38e90933c3e..5b0dd3c1993 100644 > > --- a/gcc/gimple-isel.cc > > +++ b/gcc/gimple-isel.cc > > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3. If not see > > #include "optabs.h" > > #include "gimple-fold.h" > > #include "internal-fn.h" > > +#include "fold-const.h" > > +#include "tree-pretty-print.h" > > > > /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to > > internal function based on vector type of selected expansion. > > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, > > return new_stmt; > > } > > > > + /* Replace: > > + op2 = -op1 > > + op0 = A cmp B > > + lhs = op0 ? op1 : op2 > > + > > + with: > > + op0 = A inverted_cmp B > > + lhs = .COND_NEG (op0, op1, op1). */ > > + > > + gassign *op1_def = nullptr; > > + if (TREE_CODE (op1) == SSA_NAME) > > + op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1)); > > + > > + gassign *op2_def = nullptr; > > + if (TREE_CODE (op2) == SSA_NAME) > > + op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2)); > > + > > + if (can_compute_op0 && op1_def && op2_def > > + && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR > > + && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0)) > > + { > > + auto inverted_code > > + = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), > > true); > > + gimple_assign_set_rhs_code (def_stmt, inverted_code); > > + auto gsi2 = gsi_for_stmt (op2_def); > > + gsi_remove (&gsi2, true); > > + return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, > > op1); > > + } > > + > > if (can_compute_op0 > > && used_vec_cond_exprs >= 2 > > && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type)) > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > > index 78db25bbac4..b57c7a4ed3e 100644 > > --- a/gcc/internal-fn.c > > +++ b/gcc/internal-fn.c > > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) > > (internal_fn, gcall *) = { > > T (BIT_IOR_EXPR, IFN_COND_IOR) \ > > T (BIT_XOR_EXPR, IFN_COND_XOR) \ > > T (LSHIFT_EXPR, IFN_COND_SHL) \ > > - T (RSHIFT_EXPR, IFN_COND_SHR) > > + T (RSHIFT_EXPR, IFN_COND_SHR) \ > > + T (NEGATE_EXPR, IFN_COND_NEG) > > > > /* Return a function that only performs CODE when a certain condition is > > met > > and that uses a given fallback value otherwise. For example, if CODE is > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 88169ef4656..db40d1bfd18 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, > > cond_ternary) > > DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary) > > DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary) > > > > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, > > cond_unary) > > + > > DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary) > > > > DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW, > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index 201b8aae1c0..fc65a3a7c23 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a") > > OPTAB_D (cond_fms_optab, "cond_fms$a") > > OPTAB_D (cond_fnma_optab, "cond_fnma$a") > > OPTAB_D (cond_fnms_optab, "cond_fnms$a") > > +OPTAB_D (cond_neg_optab, "cond_neg$a") > > OPTAB_D (cmov_optab, "cmov$a6") > > OPTAB_D (cstore_optab, "cstore$a4") > > OPTAB_D (ctrap_optab, "ctrap$a4")
pr93183-2.diff
Description: Binary data