Kyrylo Tkachov <ktkac...@nvidia.com> writes: > Hi all, > > The MD pattern for the XAR instruction in SVE2 is currently expressed with > non-canonical RTL by using a ROTATERT code with a constant rotate amount. > Fix it by using the left ROTATE code. This necessitates adjusting the rotate > amount during expand. > > Additionally, as the SVE2 XAR instruction is unpredicated and can handle all > element sizes from .b to .d, it is a good fit for implementing the XOR+ROTATE > operation for Advanced SIMD modes where the TARGET_SHA3 cannot be used > (that can only handle V2DImode operands). Therefore let's extend the accepted > modes of the SVE2 patternt to include the Advanced SIMD integer modes. > > This leads to some tests for the svxar* intrinsics to fail because they now > simplify to a plain EOR when the rotate amount is the width of the element. > This simplification is desirable (EOR instructions have better or equal > throughput than XAR, and they are non-destructive of their input) so the > tests are adjusted. > > For V2DImode XAR operations we should prefer the Advanced SIMD version when > it is available (TARGET_SHA3) because it is non-destructive, so restrict the > SVE2 pattern accordingly. Tests are added to confirm this. > > Bootstrapped and tested on aarch64-none-linux-gnu. > Ok for mainline? > Thanks, > Kyrill > > Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> > > gcc/ > > * config/aarch64/iterators.md (SVE_ASIMD_FULL_I): New mode iterator. > * config/aarch64/aarch64-sve2.md (@aarch64_sve2_xar<mode>): > Use SVE_ASIMD_FULL_I modes. Use ROTATE code for the rotate step. > Adjust output logic. > * config/aarch64/aarch64-sve-builtins-sve2.cc (svxar_impl): Define. > (svxar): Use the above. > > gcc/testsuite/ > > * gcc.target/aarch64/xar_neon_modes.c: New test. > * gcc.target/aarch64/xar_v2di_nonsve.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_s16.c: Scan for EOR rather than > XAR. > * gcc.target/aarch64/sve2/acle/asm/xar_s32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_s64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_s8.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_u16.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_u32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_u64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/xar_u8.c: Likewise.
Looks great to me. Just one very minor nit: > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > index ddd6e466ee3..62c17281ec7 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > @@ -90,6 +90,23 @@ public: > } > }; > > +class svxar_impl : public function_base > +{ > +public: > + rtx > + expand (function_expander &e) const override > + { > + /* aarch64_sve2_xar represents this operation with a left-rotate RTX. > + Convert the right-rotate amount from the intrinsic to fit this. */ > + machine_mode mode = e.vector_mode (0); > + HOST_WIDE_INT rot = GET_MODE_UNIT_BITSIZE (mode) > + - INTVAL (e.args[2]); > + e.args[2] > + = aarch64_simd_gen_const_vector_dup (mode, rot); The split line seems unnecessary. OK with that change as far as I'm concerned. Thanks, Richard > + return e.use_exact_insn (code_for_aarch64_sve2_xar (mode)); > + } > +}; > + > class svcdot_impl : public function_base > { > public: > @@ -773,6 +790,6 @@ FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, > UNSPEC_WHILEHS)) > FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI)) > FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW)) > FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR)) > -FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),) > +FUNCTION (svxar, svxar_impl,) > > } /* end namespace aarch64_sve */ > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index 5f2697c3179..8047f405a17 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -1266,18 +1266,28 @@ > ;; - XAR > ;; ------------------------------------------------------------------------- > > +;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction > +;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD. > +;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD > +;; version should be preferred when available as it is non-destructive on its > +;; input. > (define_insn "@aarch64_sve2_xar<mode>" > - [(set (match_operand:SVE_FULL_I 0 "register_operand") > - (rotatert:SVE_FULL_I > - (xor:SVE_FULL_I > - (match_operand:SVE_FULL_I 1 "register_operand") > - (match_operand:SVE_FULL_I 2 "register_operand")) > - (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))] > - "TARGET_SVE2" > - {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] > - [ w , %0 , w ; * ] xar\t%0.<Vetype>, %0.<Vetype>, > %2.<Vetype>, #%3 > - [ ?&w , w , w ; yes ] movprfx\t%0, > %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 > + [(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w") > + (rotate:SVE_ASIMD_FULL_I > + (xor:SVE_ASIMD_FULL_I > + (match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w") > + (match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w")) > + (match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))] > + "TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)" > + { > + operands[3] > + = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) > + - INTVAL (unwrap_const_vec_duplicate (operands[3]))); > + if (which_alternative == 0) > + return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3"; > + return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, > %Z2.<Vetype>, #%3"; > } > + [(set_attr "movprfx" "*,yes")] > ) > > ;; ------------------------------------------------------------------------- > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 0bc98315bb6..8269b0cdcd9 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -446,6 +446,9 @@ > ;; All fully-packed SVE integer vector modes. > (define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI]) > > +;; All fully-packed SVE integer and Advanced SIMD integer modes. > +(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I]) > + > ;; All fully-packed SVE floating-point vector modes. > (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF]) > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c > index 34351d52718..f69ba3f7b06 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s16_untied, svint16_t, > > /* > ** xar_16_s16_tied1: > -** xar z0\.h, z0\.h, z1\.h, #16 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t, > > /* > ** xar_16_s16_tied2: > -** xar z0\.h, z0\.h, z1\.h, #16 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t, > /* > ** xar_16_s16_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.h, z0\.h, z2\.h, #16 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.h, z0\.h, z1\.h, #16 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c > index 366a6172807..540f7b875ec 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s32_untied, svint32_t, > > /* > ** xar_32_s32_tied1: > -** xar z0\.s, z0\.s, z1\.s, #32 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t, > > /* > ** xar_32_s32_tied2: > -** xar z0\.s, z0\.s, z1\.s, #32 > +** ( > +** eor z0\.d, z0\.d, z1\.d > +** | > +** eor z0\.d, z1\.d, z0\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t, > /* > ** xar_32_s32_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.s, z0\.s, z2\.s, #32 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.s, z0\.s, z1\.s, #32 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c > index dedda2ed044..9491dbdb848 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s64_untied, svint64_t, > > /* > ** xar_64_s64_tied1: > -** xar z0\.d, z0\.d, z1\.d, #64 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t, > > /* > ** xar_64_s64_tied2: > -** xar z0\.d, z0\.d, z1\.d, #64 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t, > /* > ** xar_64_s64_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.d, z0\.d, z2\.d, #64 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.d, z0\.d, z1\.d, #64 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c > index 904352b93da..e62e5bca5ba 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s8_untied, svint8_t, > > /* > ** xar_8_s8_tied1: > -** xar z0\.b, z0\.b, z1\.b, #8 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t, > > /* > ** xar_8_s8_tied2: > -** xar z0\.b, z0\.b, z1\.b, #8 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t, > /* > ** xar_8_s8_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.b, z0\.b, z2\.b, #8 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.b, z0\.b, z1\.b, #8 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c > index c7b9665aeed..6269145bc6d 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u16_untied, svuint16_t, > > /* > ** xar_16_u16_tied1: > -** xar z0\.h, z0\.h, z1\.h, #16 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t, > > /* > ** xar_16_u16_tied2: > -** xar z0\.h, z0\.h, z1\.h, #16 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t, > /* > ** xar_16_u16_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.h, z0\.h, z2\.h, #16 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.h, z0\.h, z1\.h, #16 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c > index 115ead7701c..99efd14e1ed 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u32_untied, svuint32_t, > > /* > ** xar_32_u32_tied1: > -** xar z0\.s, z0\.s, z1\.s, #32 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t, > > /* > ** xar_32_u32_tied2: > -** xar z0\.s, z0\.s, z1\.s, #32 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t, > /* > ** xar_32_u32_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.s, z0\.s, z2\.s, #32 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.s, z0\.s, z1\.s, #32 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c > index 1d0d90e90d6..5c770ffdadb 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u64_untied, svuint64_t, > > /* > ** xar_64_u64_tied1: > -** xar z0\.d, z0\.d, z1\.d, #64 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t, > > /* > ** xar_64_u64_tied2: > -** xar z0\.d, z0\.d, z1\.d, #64 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t, > /* > ** xar_64_u64_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.d, z0\.d, z2\.d, #64 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.d, z0\.d, z1\.d, #64 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c > b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c > index 3b6161729cb..5ae5323a08a 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c > @@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u8_untied, svuint8_t, > > /* > ** xar_8_u8_tied1: > -** xar z0\.b, z0\.b, z1\.b, #8 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t, > @@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t, > > /* > ** xar_8_u8_tied2: > -** xar z0\.b, z0\.b, z1\.b, #8 > +** ( > +** eor z0\.d, z1\.d, z0\.d > +** | > +** eor z0\.d, z0\.d, z1\.d > +** ) > ** ret > */ > TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t, > @@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t, > /* > ** xar_8_u8_untied: > ** ( > -** movprfx z0, z1 > -** xar z0\.b, z0\.b, z2\.b, #8 > +** eor z0\.d, z1\.d, z2\.d > ** | > -** movprfx z0, z2 > -** xar z0\.b, z0\.b, z1\.b, #8 > +** eor z0\.d, z2\.d, z1\.d > ** ) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c > b/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c > new file mode 100644 > index 00000000000..750fbcfc48a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c > @@ -0,0 +1,39 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +#pragma GCC target "+sve2+nosha3" > + > +typedef char __attribute__ ((vector_size (16))) v16qi; > +typedef unsigned short __attribute__ ((vector_size (16))) v8hi; > +typedef unsigned int __attribute__ ((vector_size (16))) v4si; > +typedef unsigned long long __attribute__ ((vector_size (16))) v2di; > + > +v16qi > +xar_v16qi (v16qi a, v16qi b) { > + v16qi c = a ^ b; > + return (c << 2) ^ (c >> 6); > +} > +/* { dg-final { scan-assembler {\txar\tz0.b, z[0-9]+.b, z[0-9]+.b, #6} } } */ > + > +v8hi > +xar_v8hi (v8hi a, v8hi b) { > + v8hi c = a ^ b; > + return (c << 13) ^ (c >> 3); > +} > +/* { dg-final { scan-assembler {\txar\tz0.h, z[0-9]+.h, z[0-9]+.h, #3} } } */ > + > +v4si > +xar_v4si (v4si a, v4si b) { > + v4si c = a ^ b; > + return (c << 9) ^ (c >> 23); > +} > +/* { dg-final { scan-assembler {\txar\tz0.s, z[0-9]+.s, z[0-9]+.s, #23} } } > */ > + > +/* When +sha3 for Advanced SIMD is not available we should still use the > + SVE2 form of XAR. */ > +v2di > +xar_v2di (v2di a, v2di b) { > + v2di c = a ^ b; > + return (c << 22) ^ (c >> 42); > +} > +/* { dg-final { scan-assembler {\txar\tz0.d, z[0-9]+.d, z[0-9]+.d, #42} } } > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c > b/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c > new file mode 100644 > index 00000000000..b0f1a97222b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +#pragma GCC target "+sve2+sha3" > + > +typedef unsigned long long __attribute__ ((vector_size (16))) v2di; > + > +/* Both +sve2 and +sha3 have V2DImode XAR instructions, but we should > + prefer the Advanced SIMD one when both are available. */ > +v2di > +xar_v2di (v2di a, v2di b) { > + v2di c = a ^ b; > + return (c << 22) ^ (c >> 42); > +} > +/* { dg-final { scan-assembler {\txar\tv0.2d, v[0-9]+.2d, v[0-9]+.2d, 42} } > } */ > +