From 5ef38e6ce210cb54f1fe1bb3ca78fde1db76f35a Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <ktkachov@nvidia.com>
Date: Fri, 11 Jul 2025 07:23:16 -0700
Subject: [PATCH] aarch64: Use SVE2 BSL2N for vector EON

SVE2 BSL2N (x, y, z) = (x & z) | (~y & ~z). When x == y this computes:
(x & z) | (~x & ~z) which is ~(x ^ z).
Thus, we can use it to match RTL patterns (not (xor (...) (...))) for both
Advanced SIMD and SVE modes when TARGET_SVE2.
This patch does that.
For code like:

uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }

We now generate:
eon_q:
        bsl2n   z0.d, z0.d, z0.d, z1.d
        ret

eon_z:
        bsl2n   z0.d, z0.d, z0.d, z1.d
        ret

instead of the previous:
eon_q:
        eor     v0.16b, v0.16b, v1.16b
        not     v0.16b, v0.16b
        ret

eon_z:
        eor     z0.d, z0.d, z1.d
        ptrue   p3.b, all
        not     z0.d, p3/m, z0.d
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>

gcc/

	* config/aarch64/aarch64-sve2.md (*aarch64_sve2_bsl2n_eon<mode>):
	New pattern.
	(*aarch64_sve2_eon_bsl2n_unpred<mode>): Likewise.

gcc/testsuite/

	* gcc.target/aarch64/sve2/eon_bsl2n.c: New test.
---
 gcc/config/aarch64/aarch64-sve2.md            | 34 ++++++++++++
 .../gcc.target/aarch64/sve2/eon_bsl2n.c       | 52 +++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c

diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 3d460c73ae7..28bd680b5d5 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2053,6 +2053,40 @@
   }
 )
 
+;; Vector EON (~(x, y)) using BSL2N.
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand 3)
+	   (not:SVE_FULL_I
+	     (xor:SVE_FULL_I
+		(match_operand:SVE_FULL_I 1 "register_operand")
+		(match_operand:SVE_FULL_I 2 "register_operand")))]
+	    UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%0.d, %0.d, %0.d, %2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, %1.d, %2.d
+  }
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (not:VDQ_I
+         (xor:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand")
+           (match_operand:VDQ_I 2 "register_operand"))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, %Z0.d, %Z1.d, %Z2.d
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
new file mode 100644
index 00000000000..74b46376373
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/eon_bsl2n.c
@@ -0,0 +1,52 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#define EON(x, y)   (~((x) ^ (y)))
+
+/*
+** eon_d:
+** 	bsl2n	z0.d, z0.d, z0.d, z1.d
+** 	ret
+*/
+uint32x2_t eon_d(uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_d_mp:
+**	movprfx	z0, z1
+** 	bsl2n	z0.d, z0.d, z1.d, z2.d
+** 	ret
+*/
+uint32x2_t eon_d_mp(uint32x2_t c, uint32x2_t a, uint32x2_t b) { return EON(a, b); }
+
+/*
+** eon_q:
+** 	bsl2n	z0.d, z0.d, z0.d, z1.d
+** 	ret
+*/
+uint64x2_t eon_q(uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_q_mp:
+**	movprfx	z0, z1
+** 	bsl2n	z0.d, z0.d, z1.d, z2.d
+** 	ret
+*/
+uint64x2_t eon_q_mp(uint64x2_t c, uint64x2_t a, uint64x2_t b) { return EON(a, b); }
+
+/*
+** eon_z:
+** 	bsl2n	z0.d, z0.d, z0.d, z1.d
+** 	ret
+*/
+svuint64_t eon_z(svuint64_t a, svuint64_t b) { return EON(a, b); }
+
+/*
+** eon_z_mp:
+**	movprfx	z0, z1
+** 	bsl2n	z0.d, z0.d, z1.d, z2.d
+** 	ret
+*/
+svuint64_t eon_z_mp(svuint64_t c, svuint64_t a, svuint64_t b) { return EON(a, b); }
-- 
2.44.0

