Pengfei Li <pengfei....@arm.com> writes: > This patch transforms RTL expressions of the form (subreg (not X)) into > (not (subreg X)) if the subreg is an operand of another binary logical > operation. This transformation can expose opportunities to combine more > logical operations. > > For example, it improves the codegen of the following AArch64 NEON > intrinsics: > vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)), > vreinterpretq_s64_s32(b)); > from: > not v0.16b, v0.16b > and v0.16b, v0.16b, v1.16b > to: > bic v0.16b, v1.16b, v0.16b > > Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and > aarch64-linux-gnu. > > gcc/ChangeLog: > > * simplify-rtx.cc (non_paradoxical_subreg_not_p): New function > for pattern match of (subreg (not X)). > (simplify_with_subreg_not): New function for simplification.
LGTM, thanks. Pushed to trunk. Richard > --- > gcc/simplify-rtx.cc | 50 +++++++++++++++++++ > .../gcc.target/aarch64/simd/bic_orn_1.c | 17 +++++++ > 2 files changed, 67 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c > > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc > index 06b52ca8003..5a6c1a9c039 100644 > --- a/gcc/simplify-rtx.cc > +++ b/gcc/simplify-rtx.cc > @@ -3032,6 +3032,44 @@ match_plus_neg_pattern (rtx op0, rtx op1, machine_mode > mode) > return false; > } > > +/* Check if OP matches the pattern of (subreg (not X)) and the subreg is > + non-paradoxical. */ > + > +static bool > +non_paradoxical_subreg_not_p (rtx op) > +{ > + return GET_CODE (op) == SUBREG > + && !paradoxical_subreg_p (op) > + && GET_CODE (SUBREG_REG (op)) == NOT; > +} > + > +/* Convert (binop (subreg (not X)) Y) into (binop (not (subreg X)) Y), or > + (binop X (subreg (not Y))) into (binop X (not (subreg Y))) to expose > + opportunities to combine another binary logical operation with NOT. */ > + > +static rtx > +simplify_with_subreg_not (rtx_code binop, machine_mode mode, rtx op0, rtx > op1) > +{ > + rtx opn = NULL_RTX; > + if (non_paradoxical_subreg_not_p (op0)) > + opn = op0; > + else if (non_paradoxical_subreg_not_p (op1)) > + opn = op1; > + > + if (opn == NULL_RTX) > + return NULL_RTX; > + > + rtx new_subreg = simplify_gen_subreg (mode, > + XEXP (SUBREG_REG (opn), 0), > + GET_MODE (SUBREG_REG (opn)), > + SUBREG_BYTE (opn)); > + rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode); > + if (opn == op0) > + return simplify_gen_binary (binop, mode, new_not, op1); > + else > + return simplify_gen_binary (binop, mode, op0, new_not); > +} > + > /* Subroutine of simplify_binary_operation. Simplify a binary operation > CODE with result mode MODE, operating on OP0 and OP1. If OP0 and/or > OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the > @@ -3749,6 +3787,10 @@ simplify_context::simplify_binary_operation_1 > (rtx_code code, > && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1)) > return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1); > > + tem = simplify_with_subreg_not (code, mode, op0, op1); > + if (tem) > + return tem; > + > tem = simplify_byte_swapping_operation (code, mode, op0, op1); > if (tem) > return tem; > @@ -4017,6 +4059,10 @@ simplify_context::simplify_binary_operation_1 > (rtx_code code, > && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1)) > return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1); > > + tem = simplify_with_subreg_not (code, mode, op0, op1); > + if (tem) > + return tem; > + > tem = simplify_byte_swapping_operation (code, mode, op0, op1); > if (tem) > return tem; > @@ -4285,6 +4331,10 @@ simplify_context::simplify_binary_operation_1 > (rtx_code code, > return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP > (op0, 1)); > } > > + tem = simplify_with_subreg_not (code, mode, op0, op1); > + if (tem) > + return tem; > + > tem = simplify_byte_swapping_operation (code, mode, op0, op1); > if (tem) > return tem; > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c > b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c > new file mode 100644 > index 00000000000..1c66f21424e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +#include <arm_neon.h> > + > +int64x2_t bic_16b (int32x4_t a, int32x4_t b) { > + return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)), > + vreinterpretq_s64_s32 (b)); > +} > + > +int16x4_t orn_8b (int32x2_t a, int32x2_t b) { > + return vorr_s16 (vreinterpret_s16_s32 (a), > + vreinterpret_s16_s32 (vmvn_s32 (b))); > +} > + > +/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */ > +/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */