Tamar Christina <tamar.christ...@arm.com> writes:
> Hi,
>
> Sending a new version of the patch because I noticed the pattern was 
> overriding the nor pattern.
>
> A second pattern is needed to capture the nor case as combine will match the
> longest sequence first.  So without this pattern we end up de-optimizing nor
> and instead emit two nots.  I did not find a better way to do this.

Hmm, that's unfortunate.  But yeah, I don't know of a better way
of avoiding it either.

There's a risk we might need a pattern with the operands swapped
as well (so that the (not (reg …)) comes first) but it would be better
to avoid that using a new canonicalisation rule if necessary.

> Note: This patch series is working incrementally towards generating the most
>       efficient code for this and other loops in small steps.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64-sve.md (*fcm<cmp_op><mode>_bic_combine,
>         *fcm<cmp_op><mode>_nor_combine, *fcmuo<mode>_bic_combine,
>         *fcmuo<mode>_nor_combine): New.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/sve/pred-not-gen.c-1: New test.
>         * gcc.target/aarch64/sve/pred-not-gen.c-2: New test.
>         * gcc.target/aarch64/sve/pred-not-gen.c-3: New test.
>         * gcc.target/aarch64/sve/pred-not-gen.c-4: New test.

OK, thanks.

Richard

>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 359fe0e457096cf4042a774789a5c241420703d3..8fe4c721313e70592d2cf0acbfbe2f07b070b51a
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -8126,6 +8126,160 @@ (define_insn_and_split "*fcmuo<mode>_and_combine"
>           UNSPEC_COND_FCMUO))]
>  )
>
> +;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
> +;; In this case, we still need a separate NOT/BIC operation, but predicating
> +;; the comparison on the BIC operand removes the need for a PTRUE.
> +(define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
> +  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
> +       (and:<VPRED>
> +         (and:<VPRED>
> +           (not:<VPRED>
> +             (unspec:<VPRED>
> +               [(match_operand:<VPRED> 1)
> +                (const_int SVE_KNOWN_PTRUE)
> +                (match_operand:SVE_FULL_F 2 "register_operand" "w")
> +                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +               SVE_COND_FP_CMP_I0))
> +           (match_operand:<VPRED> 4 "register_operand" "Upa"))
> +         (match_dup:<VPRED> 1)))
> +   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +       (unspec:<VPRED>
> +         [(match_dup 4)
> +          (const_int SVE_MAYBE_NOT_PTRUE)
> +          (match_dup 2)
> +          (match_dup 3)]
> +         SVE_COND_FP_CMP_I0))
> +   (set (match_dup 0)
> +       (and:<VPRED>
> +         (not:<VPRED>
> +           (match_dup 5))
> +         (match_dup 4)))]
> +{
> +  if (can_create_pseudo_p ())
> +    operands[5] = gen_reg_rtx (<VPRED>mode);
> +}
> +)
> +
> +;; Make sure that we expand to a nor when the operand 4 of
> +;; *fcm<cmp_op><mode>_bic_combine is a not.
> +(define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
> +  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
> +       (and:<VPRED>
> +         (and:<VPRED>
> +           (not:<VPRED>
> +             (unspec:<VPRED>
> +               [(match_operand:<VPRED> 1)
> +                (const_int SVE_KNOWN_PTRUE)
> +                (match_operand:SVE_FULL_F 2 "register_operand" "w")
> +                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +               SVE_COND_FP_CMP_I0))
> +           (not:<VPRED>
> +             (match_operand:<VPRED> 4 "register_operand" "Upa")))
> +         (match_dup:<VPRED> 1)))
> +   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +       (unspec:<VPRED>
> +         [(match_dup 1)
> +          (const_int SVE_KNOWN_PTRUE)
> +          (match_dup 2)
> +          (match_dup 3)]
> +         SVE_COND_FP_CMP_I0))
> +   (set (match_dup 0)
> +       (and:<VPRED>
> +         (and:<VPRED>
> +           (not:<VPRED>
> +             (match_dup 5))
> +           (not:<VPRED>
> +             (match_dup 4)))
> +         (match_dup 1)))]
> +{
> +  if (can_create_pseudo_p ())
> +    operands[5] = gen_reg_rtx (<VPRED>mode);
> +}
> +)
> +
> +(define_insn_and_split "*fcmuo<mode>_bic_combine"
> +  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
> +       (and:<VPRED>
> +         (and:<VPRED>
> +           (not:<VPRED>
> +             (unspec:<VPRED>
> +               [(match_operand:<VPRED> 1)
> +                (const_int SVE_KNOWN_PTRUE)
> +                (match_operand:SVE_FULL_F 2 "register_operand" "w")
> +                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +               UNSPEC_COND_FCMUO))
> +           (match_operand:<VPRED> 4 "register_operand" "Upa"))
> +         (match_dup:<VPRED> 1)))
> +   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +       (unspec:<VPRED>
> +         [(match_dup 4)
> +          (const_int SVE_MAYBE_NOT_PTRUE)
> +          (match_dup 2)
> +          (match_dup 3)]
> +         UNSPEC_COND_FCMUO))
> +   (set (match_dup 0)
> +       (and:<VPRED>
> +         (not:<VPRED>
> +           (match_dup 5))
> +         (match_dup 4)))]
> +{
> +  if (can_create_pseudo_p ())
> +    operands[5] = gen_reg_rtx (<VPRED>mode);
> +}
> +)
> +
> +;; Same for unordered comparisons.
> +(define_insn_and_split "*fcmuo<mode>_nor_combine"
> +  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
> +       (and:<VPRED>
> +         (and:<VPRED>
> +           (not:<VPRED>
> +             (unspec:<VPRED>
> +               [(match_operand:<VPRED> 1)
> +                (const_int SVE_KNOWN_PTRUE)
> +                (match_operand:SVE_FULL_F 2 "register_operand" "w")
> +                (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +               UNSPEC_COND_FCMUO))
> +           (not:<VPRED>
> +             (match_operand:<VPRED> 4 "register_operand" "Upa")))
> +         (match_dup:<VPRED> 1)))
> +   (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +       (unspec:<VPRED>
> +         [(match_dup 1)
> +          (const_int SVE_KNOWN_PTRUE)
> +          (match_dup 2)
> +          (match_dup 3)]
> +         UNSPEC_COND_FCMUO))
> +   (set (match_dup 0)
> +       (and:<VPRED>
> +         (and:<VPRED>
> +           (not:<VPRED>
> +             (match_dup 5))
> +           (not:<VPRED>
> +             (match_dup 4)))
> +         (match_dup 1)))]
> +{
> +  if (can_create_pseudo_p ())
> +    operands[5] = gen_reg_rtx (<VPRED>mode);
> +}
> +)
> +
>  ;; -------------------------------------------------------------------------
>  ;; ---- [FP] Absolute comparisons
>  ;; -------------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..2c06564186c5a5e7917da475a9c201c81dfeb136
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +/*
> +** f10:
> +** ...
> +**     ld1d    z1.d, p0/z, \[x1, x5, lsl 3\]
> +**     fcmgt   p2.d, p0/z, z1.d, #0.0
> +**     ld1d    z2.d, p2/z, \[x2, x5, lsl 3\]
> +**     not     p1.b, p0/z, p2.b
> +**     ld1d    z0.d, p1/z, \[x3, x5, lsl 3\]
> +** ...
> +*/
> +
> +void f10(double * restrict z, double * restrict w, double * restrict x, 
> double * restrict y, int n)
> +{
> +    for (int i = 0; i < n; i++) {
> +        z[i] = (w[i] > 0) ? x[i] + w[i] : y[i] - w[i];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-not {\tbic\t} } } */
> +/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..0c3b78d4c67455c971e94fb2ffdd7be2d4884864
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> @@ -0,0 +1,23 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +/*
> +** f11:
> +** ...
> +**     ld1d    z0.d, p0/z, \[x1, x2, lsl 3\]
> +**     fcmgt   p2.d, p3/z, z0.d, #0.0
> +**     fcmgt   p1.d, p0/z, z0.d, #0.0
> +**     not     p1.b, p0/z, p1.b
> +**     ld1d    z1.d, p1/z, \[x3, x2, lsl 3\]
> +** ...
> +*/
> +
> +void f11(double * restrict z, double * restrict w, double * restrict x, 
> double * restrict y, int n)
> +{
> +    for (int i = 0; i < n; i++) {
> +        z[i] = (w[i] > 0) ? w[i] : y[i];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-not {\tbic\t} } } */
> +/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..248f8ab57191ce8a1d4c334533de8bc76aa07691
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> @@ -0,0 +1,21 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +/*
> +** f12:
> +** ...
> +**     ld1w    z1.s, p0/z, \[x1, x2, lsl 2\]
> +**     cmple   p1.s, p0/z, z1.s, #0
> +**     ld1w    z0.s, p1/z, \[x3, x2, lsl 2\]
> +** ...
> +*/
> +
> +void f12(int * restrict z, int * restrict w, int * restrict x, int * 
> restrict y, int n)
> +{
> +    for (int i = 0; i < n; i++) {
> +        z[i] = (w[i] > 0) ? w[i] : y[i];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-not {\tbic\t} } } */
> +/* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..96200309880a91ad1db5801115c911cfdce06125
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +#include <math.h>
> +
> +void f13(double * restrict z, double * restrict w, double * restrict x, 
> double * restrict y, int n)
> +{
> +    for (int i = 0; i < n; i++) {
> +        z[i] = (isunordered(w[i], 0)) ? x[i] + w[i] : y[i] - w[i];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-not {\tbic\t} } } */
> +/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */

Reply via email to