Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > On Fri, 8 Oct 2021 at 21:19, Richard Sandiford > <richard.sandif...@arm.com> wrote: >> >> Thanks for looking at this. >> >> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: >> > Hi, >> > As mentioned in PR, for the following test-case: >> > >> > typedef unsigned char uint8_t; >> > >> > static inline uint8_t >> > x264_clip_uint8(uint8_t x) >> > { >> > uint8_t t = -x; >> > uint8_t t1 = x & ~63; >> > return (t1 != 0) ? t : x; >> > } >> > >> > void >> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n) >> > { >> > for (int x = 0; x < n*16; x++) >> > dst[x] = x264_clip_uint8(src[x]); >> > } >> > >> > -O3 -mcpu=generic+sve generates following code for the inner loop: >> > >> > .L3: >> > ld1b z0.b, p0/z, [x1, x2] >> > movprfx z2, z0 >> > and z2.b, z2.b, #0xc0 >> > movprfx z1, z0 >> > neg z1.b, p1/m, z0.b >> > cmpeq p2.b, p1/z, z2.b, #0 >> > sel z0.b, p2, z0.b, z1.b >> > st1b z0.b, p0, [x0, x2] >> > add x2, x2, x4 >> > whilelo p0.b, w2, w3 >> > b.any .L3 >> > >> > The sel is redundant since we could conditionally negate z0 based on >> > the predicate >> > comparing z2 with 0. >> > >> > As suggested in the PR, the attached patch, introduces a new >> > conditional internal function .COND_NEG, and in gimple-isel replaces >> > the following sequence: >> > op2 = -op1 >> > op0 = A cmp B >> > lhs = op0 ? op1 : op2 >> > >> > with: >> > op0 = A inverted_cmp B >> > lhs = .COND_NEG (op0, op1, op1). >> > >> > lhs = .COD_NEG (op0, op1, op1) >> > implies >> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false. >> > >> > With patch, it generates the following code-gen: >> > .L3: >> > ld1b z0.b, p0/z, [x1, x2] >> > movprfx z1, z0 >> > and z1.b, z1.b, #0xc0 >> > cmpne p1.b, p2/z, z1.b, #0 >> > neg z0.b, p1/m, z0.b >> > st1b z0.b, p0, [x0, x2] >> > add x2, x2, x4 >> > whilelo p0.b, w2, w3 >> > b.any .L3 >> > >> > While it seems to work for this test-case, I am not entirely sure if >> > the patch is correct. Does it look in the right direction ? >> >> For binary ops we use match.pd rather than isel: >> >> (for uncond_op (UNCOND_BINARY) >> cond_op (COND_BINARY) >> (simplify >> (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) >> (with { tree op_type = TREE_TYPE (@4); } >> (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), >> op_type) >> && is_truth_type_for (op_type, TREE_TYPE (@0))) >> (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)))))) >> (simplify >> (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) >> (with { tree op_type = TREE_TYPE (@4); } >> (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), >> op_type) >> && is_truth_type_for (op_type, TREE_TYPE (@0))) >> (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))))))) >> >> I think it'd be good to do the same here, using new (UN)COND_UNARY >> iterators. (The iterators will only have one value to start with, >> but other unary ops could get the same treatment in future.) > Thanks for the suggestions. > The attached patch adds a pattern to match.pd to replace: > cond = a cmp b > r = cond ? x : -x > with: > cond = a inverted_cmp b > r = cond ? -x : x > > Code-gen with patch for inner loop: > .L3: > ld1b z0.b, p0/z, [x1, x2] > movprfx z1, z0 > and z1.b, z1.b, #0xc0 > cmpne p1.b, p2/z, z1.b, #0 > neg z0.b, p1/m, z0.b > st1b z0.b, p0, [x0, x2] > add x2, x2, x4 > whilelo p0.b, w2, w3 > b.any .L3 > > Does it look OK ? > I didn't add it under (UN)COND_UNARY since it inverts the comparison, > which we might not want to do for other unary ops ?
I think we should follow the structure of the current binary and ternary patterns: cope with unary operations in either arm of the vec_cond and use bit_not for the case in which the unary operation is in the “false” arm of the vec_cond. The bit_not will be folded away if the comparison can be inverted, but it will be left in-place if the comparison can't be inverted (as for some FP comparisons). Thanks, Richard > > Also, I am not sure, how to test if target supports conditional > internal function ? > I tried to use: > (for cmp (tcc_comparison) > icmp (inverted_tcc_comparison) > (simplify > (vec_cond (cmp@2 @0 @1) @3 (negate @3)) > (with { auto op_type = TREE_TYPE (@2); } > (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type) > && is_truth_type_for (op_type, TREE_TYPE (@0))) > (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))) > > but both the conditions seem to fail. > > Thanks, > Prathamesh > > >> >> Richard >> >> >> > >> > Thanks, >> > Prathamesh >> > >> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc >> > index 38e90933c3e..5b0dd3c1993 100644 >> > --- a/gcc/gimple-isel.cc >> > +++ b/gcc/gimple-isel.cc >> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3. If not see >> > #include "optabs.h" >> > #include "gimple-fold.h" >> > #include "internal-fn.h" >> > +#include "fold-const.h" >> > +#include "tree-pretty-print.h" >> > >> > /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls >> > to >> > internal function based on vector type of selected expansion. >> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator >> > *gsi, >> > return new_stmt; >> > } >> > >> > + /* Replace: >> > + op2 = -op1 >> > + op0 = A cmp B >> > + lhs = op0 ? op1 : op2 >> > + >> > + with: >> > + op0 = A inverted_cmp B >> > + lhs = .COND_NEG (op0, op1, op1). */ >> > + >> > + gassign *op1_def = nullptr; >> > + if (TREE_CODE (op1) == SSA_NAME) >> > + op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1)); >> > + >> > + gassign *op2_def = nullptr; >> > + if (TREE_CODE (op2) == SSA_NAME) >> > + op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2)); >> > + >> > + if (can_compute_op0 && op1_def && op2_def >> > + && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR >> > + && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0)) >> > + { >> > + auto inverted_code >> > + = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), >> > true); >> > + gimple_assign_set_rhs_code (def_stmt, inverted_code); >> > + auto gsi2 = gsi_for_stmt (op2_def); >> > + gsi_remove (&gsi2, true); >> > + return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, >> > op1); >> > + } >> > + >> > if (can_compute_op0 >> > && used_vec_cond_exprs >= 2 >> > && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type)) >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c >> > index 78db25bbac4..b57c7a4ed3e 100644 >> > --- a/gcc/internal-fn.c >> > +++ b/gcc/internal-fn.c >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) >> > (internal_fn, gcall *) = { >> > T (BIT_IOR_EXPR, IFN_COND_IOR) \ >> > T (BIT_XOR_EXPR, IFN_COND_XOR) \ >> > T (LSHIFT_EXPR, IFN_COND_SHL) \ >> > - T (RSHIFT_EXPR, IFN_COND_SHR) >> > + T (RSHIFT_EXPR, IFN_COND_SHR) \ >> > + T (NEGATE_EXPR, IFN_COND_NEG) >> > >> > /* Return a function that only performs CODE when a certain condition is >> > met >> > and that uses a given fallback value otherwise. For example, if CODE >> > is >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def >> > index 88169ef4656..db40d1bfd18 100644 >> > --- a/gcc/internal-fn.def >> > +++ b/gcc/internal-fn.def >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, >> > cond_ternary) >> > DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary) >> > DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary) >> > >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, >> > cond_unary) >> > + >> > DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary) >> > >> > DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW, >> > diff --git a/gcc/optabs.def b/gcc/optabs.def >> > index 201b8aae1c0..fc65a3a7c23 100644 >> > --- a/gcc/optabs.def >> > +++ b/gcc/optabs.def >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a") >> > OPTAB_D (cond_fms_optab, "cond_fms$a") >> > OPTAB_D (cond_fnma_optab, "cond_fnma$a") >> > OPTAB_D (cond_fnms_optab, "cond_fnms$a") >> > +OPTAB_D (cond_neg_optab, "cond_neg$a") >> > OPTAB_D (cmov_optab, "cmov$a6") >> > OPTAB_D (cstore_optab, "cstore$a4") >> > OPTAB_D (ctrap_optab, "ctrap$a4") > > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c > index 7112c116835..90cf131af48 100644 > --- a/gcc/gimple-match-head.c > +++ b/gcc/gimple-match-head.c > @@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, > gimple_match_op *res_op, > if (!gimple_resimplify3 (seq, &cond_op, valueize)) > return false; > break; > + case 1: > + if (!gimple_resimplify1 (seq, &cond_op, valueize)) > + return false; > + break; > default: > gcc_unreachable (); > } > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index 78db25bbac4..b57c7a4ed3e 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) > (internal_fn, gcall *) = { > T (BIT_IOR_EXPR, IFN_COND_IOR) \ > T (BIT_XOR_EXPR, IFN_COND_XOR) \ > T (LSHIFT_EXPR, IFN_COND_SHL) \ > - T (RSHIFT_EXPR, IFN_COND_SHR) > + T (RSHIFT_EXPR, IFN_COND_SHR) \ > + T (NEGATE_EXPR, IFN_COND_NEG) > > /* Return a function that only performs CODE when a certain condition is met > and that uses a given fallback value otherwise. For example, if CODE is > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 88169ef4656..db40d1bfd18 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, > cond_ternary) > DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary) > DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary) > > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, > cond_unary) > + > DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary) > > DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW, > diff --git a/gcc/match.pd b/gcc/match.pd > index a9791ceb74a..fde6d32b2c4 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > wi::mask (tree_to_uhwi (@1), > false, prec)); }) > { build_zero_cst (TREE_TYPE (@0)); })))))))) > +#if GIMPLE > + > +/* Simplify: > + > + cond = a cmp b > + r = cond ? x : -x > + > + to: > + > + cond = a inverted_cmp b > + r = cond ? -x : x. */ > + > +(for cmp (tcc_comparison) > + icmp (inverted_tcc_comparison) > + (simplify > + (vec_cond (cmp@2 @0 @1) @3 (negate @3)) > + (with { auto op_type = TREE_TYPE (@2); } > + (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))) > > /* Simplify: > > @@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > conditional internal functions must support the same comparisons > inside and outside a VEC_COND_EXPR. */ > > -#if GIMPLE > (for uncond_op (UNCOND_BINARY) > cond_op (COND_BINARY) > (simplify > diff --git a/gcc/optabs.def b/gcc/optabs.def > index 201b8aae1c0..fc65a3a7c23 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a") > OPTAB_D (cond_fms_optab, "cond_fms$a") > OPTAB_D (cond_fnma_optab, "cond_fnma$a") > OPTAB_D (cond_fnms_optab, "cond_fnms$a") > +OPTAB_D (cond_neg_optab, "cond_neg$a") > OPTAB_D (cmov_optab, "cmov$a6") > OPTAB_D (cstore_optab, "cstore$a4") > OPTAB_D (ctrap_optab, "ctrap$a4")