Pengfei Li <pengfei....@arm.com> writes:
> This patch transforms RTL expressions of the form (subreg (not X)) into
> (not (subreg X)) if the subreg is an operand of another binary logical
> operation. This transformation can expose opportunities to combine more
> logical operations.
>
> For example, it improves the codegen of the following AArch64 NEON
> intrinsics:
>       vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)),
>                 vreinterpretq_s64_s32(b));
> from:
>       not     v0.16b, v0.16b
>       and     v0.16b, v0.16b, v1.16b
> to:
>       bic     v0.16b, v1.16b, v0.16b
>
> Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and
> aarch64-linux-gnu.
>
> gcc/ChangeLog:
>
>       * simplify-rtx.cc (non_paradoxical_subreg_not_p): New function
>       for pattern match of (subreg (not X)).
>       (simplify_with_subreg_not): New function for simplification.

LGTM, thanks.  Pushed to trunk.

Richard

> ---
>  gcc/simplify-rtx.cc                           | 50 +++++++++++++++++++
>  .../gcc.target/aarch64/simd/bic_orn_1.c       | 17 +++++++
>  2 files changed, 67 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
>
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> index 06b52ca8003..5a6c1a9c039 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -3032,6 +3032,44 @@ match_plus_neg_pattern (rtx op0, rtx op1, machine_mode 
> mode)
>    return false;
>  }
>  
> +/* Check if OP matches the pattern of (subreg (not X)) and the subreg is
> +   non-paradoxical.  */
> +
> +static bool
> +non_paradoxical_subreg_not_p (rtx op)
> +{
> +  return GET_CODE (op) == SUBREG
> +      && !paradoxical_subreg_p (op)
> +      && GET_CODE (SUBREG_REG (op)) == NOT;
> +}
> +
> +/* Convert (binop (subreg (not X)) Y) into (binop (not (subreg X)) Y), or
> +   (binop X (subreg (not Y))) into (binop X (not (subreg Y))) to expose
> +   opportunities to combine another binary logical operation with NOT.  */
> +
> +static rtx
> +simplify_with_subreg_not (rtx_code binop, machine_mode mode, rtx op0, rtx 
> op1)
> +{
> +  rtx opn = NULL_RTX;
> +  if (non_paradoxical_subreg_not_p (op0))
> +    opn = op0;
> +  else if (non_paradoxical_subreg_not_p (op1))
> +    opn = op1;
> +
> +  if (opn == NULL_RTX)
> +    return NULL_RTX;
> +
> +  rtx new_subreg = simplify_gen_subreg (mode,
> +                                     XEXP (SUBREG_REG (opn), 0),
> +                                     GET_MODE (SUBREG_REG (opn)),
> +                                     SUBREG_BYTE (opn));
> +  rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
> +  if (opn == op0)
> +    return simplify_gen_binary (binop, mode, new_not, op1);
> +  else
> +    return simplify_gen_binary (binop, mode, op0, new_not);
> +}
> +
>  /* Subroutine of simplify_binary_operation.  Simplify a binary operation
>     CODE with result mode MODE, operating on OP0 and OP1.  If OP0 and/or
>     OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the
> @@ -3749,6 +3787,10 @@ simplify_context::simplify_binary_operation_1 
> (rtx_code code,
>         && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
>       return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
>  
> +      tem = simplify_with_subreg_not (code, mode, op0, op1);
> +      if (tem)
> +     return tem;
> +
>        tem = simplify_byte_swapping_operation (code, mode, op0, op1);
>        if (tem)
>       return tem;
> @@ -4017,6 +4059,10 @@ simplify_context::simplify_binary_operation_1 
> (rtx_code code,
>         && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
>       return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
>  
> +      tem = simplify_with_subreg_not (code, mode, op0, op1);
> +      if (tem)
> +     return tem;
> +
>        tem = simplify_byte_swapping_operation (code, mode, op0, op1);
>        if (tem)
>       return tem;
> @@ -4285,6 +4331,10 @@ simplify_context::simplify_binary_operation_1 
> (rtx_code code,
>           return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP 
> (op0, 1));
>       }
>  
> +      tem = simplify_with_subreg_not (code, mode, op0, op1);
> +      if (tem)
> +     return tem;
> +
>        tem = simplify_byte_swapping_operation (code, mode, op0, op1);
>        if (tem)
>       return tem;
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c 
> b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
> new file mode 100644
> index 00000000000..1c66f21424e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <arm_neon.h>
> +
> +int64x2_t bic_16b (int32x4_t a, int32x4_t b) {
> +  return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)),
> +                 vreinterpretq_s64_s32 (b));
> +}
> +
> +int16x4_t orn_8b (int32x2_t a, int32x2_t b) {
> +  return vorr_s16 (vreinterpret_s16_s32 (a),
> +                vreinterpret_s16_s32 (vmvn_s32 (b)));
> +}
> +
> +/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */
> +/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */

Reply via email to