Thanks very much, updated. Regards, Yuliang
gcc/ChangeLog: 2019-10-17 Yuliang Wang <yuliang.w...@arm.com> * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>) (aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>) (aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>) (aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>): New combine patterns. * config/aarch64/iterators.md (BSL_DUP): New int iterator for the above. (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above. gcc/testsuite/ChangeLog: 2019-10-17 Yuliang Wang <yuliang.w...@arm.com> * gcc.target/aarch64/sve2/eor3_1.c: New test. * gcc.target/aarch64/sve2/nlogic_1.c: As above. * gcc.target/aarch64/sve2/nlogic_2.c: As above. * gcc.target/aarch64/sve2/bitsel_1.c: As above. * gcc.target/aarch64/sve2/bitsel_2.c: As above. * gcc.target/aarch64/sve2/bitsel_3.c: As above. * gcc.target/aarch64/sve2/bitsel_4.c: As above. diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index b018f5b0bc9b51edf831e2571f0f5a9af2210829..1158a76c49adc329d72a9eb9dbe6bf6f380f92c6 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -142,3 +142,188 @@ } ) +;; Unpredicated 3-way exclusive OR. +(define_insn "*aarch64_sve2_eor3<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") + (xor:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" "0, w, w, w") + (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")) + (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))] + "TARGET_SVE2" + "@ + eor3\t%0.d, %0.d, %2.d, %3.d + eor3\t%0.d, %0.d, %1.d, %3.d + eor3\t%0.d, %0.d, %1.d, %2.d + movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d" + [(set_attr "movprfx" "*,*,*,yes")] +) + +;; Use NBSL for vector NOR. +(define_insn_and_rewrite "*aarch64_sve2_nor<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand 3) + (and:SVE_I + (not:SVE_I + (match_operand:SVE_I 1 "register_operand" "%0, w")) + (not:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w")))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + "@ + nbsl\t%0.d, %0.d, %2.d, %0.d + movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d" + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (<VPRED>mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Use NBSL for vector NAND. +(define_insn_and_rewrite "*aarch64_sve2_nand<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand 3) + (ior:SVE_I + (not:SVE_I + (match_operand:SVE_I 1 "register_operand" "%0, w")) + (not:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w")))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + "@ + nbsl\t%0.d, %0.d, %2.d, %2.d + movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d" + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (<VPRED>mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select. +;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) +(define_insn "*aarch64_sve2_bsl<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (xor:SVE_I + (and:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_dup BSL_DUP)))] + "TARGET_SVE2" + "@ + bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d + movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise inverted select. +;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) +(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand 4) + (not:SVE_I + (xor:SVE_I + (and:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_dup BSL_DUP)))] + UNSPEC_PRED_X))] + "TARGET_SVE2" + "@ + nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d + movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select with inverted first operand. +;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) +(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (xor:SVE_I + (and:SVE_I + (unspec:SVE_I + [(match_operand 4) + (not:SVE_I + (xor:SVE_I + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))] + UNSPEC_PRED_X) + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_dup BSL_DUP)))] + "TARGET_SVE2" + "@ + bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d + movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select with inverted second operand. +;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (ior:SVE_I + (and:SVE_I + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) + (unspec:SVE_I + [(match_operand 4) + (and:SVE_I + (not:SVE_I + (match_operand:SVE_I 3 "register_operand" "w, w")) + (not:SVE_I + (match_dup BSL_DUP)))] + UNSPEC_PRED_X)))] + "TARGET_SVE2" + "@ + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } + [(set_attr "movprfx" "*,yes")] +) + +;; Unpredicated bitwise select with inverted second operand, alternative form. +;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (ior:SVE_I + (and:SVE_I + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) + (unspec:SVE_I + [(match_operand 4) + (and:SVE_I + (not:SVE_I + (match_dup BSL_DUP)) + (not:SVE_I + (match_operand:SVE_I 3 "register_operand" "w, w")))] + UNSPEC_PRED_X)))] + "TARGET_SVE2" + "@ + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } + [(set_attr "movprfx" "*,yes")] +) + diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 1e321af710bfe80606eedee7e0d191f36c70355b..f879fadb007a23749a523edbe7fe247dee33fa94 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1611,6 +1611,8 @@ (define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT]) +(define_int_iterator BSL_DUP [1 2]) + (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN @@ -1976,6 +1978,18 @@ (UNSPEC_RADDHN2 "add") (UNSPEC_RSUBHN2 "sub")]) +;; BSL variants: first commutative operand. +(define_int_attr bsl_1st [(1 "w") (2 "0")]) + +;; BSL variants: second commutative operand. +(define_int_attr bsl_2nd [(1 "0") (2 "w")]) + +;; BSL variants: duplicated input operand. +(define_int_attr bsl_dup [(1 "1") (2 "2")]) + +;; BSL variants: operand which requires preserving via movprfx. +(define_int_attr bsl_mov [(1 "2") (2 "1")]) + (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "") (UNSPEC_SSRI "offset_") (UNSPEC_USRI "offset_")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c new file mode 100644 index 0000000000000000000000000000000000000000..5c58ff54231d88a4ebf0a91fe4fac97079c8d992 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#include <stdint.h> + +#ifndef OP +#define OP(x,y,z) (((x) & (z)) | ((y) & ~(z))) +#endif + +#define TYPE(N) int##N##_t + +#define TEMPLATE(SIZE) \ +void __attribute__ ((noinline, noclone)) \ +f_##SIZE##_##OP \ + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \ + TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = OP (b[i], c[i], d[i]); \ +} + +TEMPLATE (8); +TEMPLATE (16); +TEMPLATE (32); +TEMPLATE (64); + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c new file mode 100644 index 0000000000000000000000000000000000000000..ac0d27213e84bb5c7f3d236f3cac59c71ac674ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) (~(((x) & (z)) | ((y) & ~(z)))) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c new file mode 100644 index 0000000000000000000000000000000000000000..93995bb8bade89cd821ed85153d13e96bd4422a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) ((~(x) & (z)) | ((y) & ~(z))) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tbic\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c new file mode 100644 index 0000000000000000000000000000000000000000..7ccec619b4d1e8de366c0b0c53879a89a00c2c49 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bitsel_4.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) (((x) & (z)) | (~(y) & ~(z))) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c new file mode 100644 index 0000000000000000000000000000000000000000..551802a0c9f007273ddc68cc4ce77defe700d76e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/eor3_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y,z) ((x) ^ (y) ^ (z)) + +#include "bitsel_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\teor3\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c new file mode 100644 index 0000000000000000000000000000000000000000..ef0e266bd93bb3d3b5af204438ad8ef35faa5675 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#include <stdint.h> + +#ifndef OP +#define OP(x,y) (~((x) | (y))) +#endif + +#define TYPE(N) int##N##_t + +#define TEMPLATE(SIZE) \ +void __attribute__ ((noinline, noclone)) \ +f_##SIZE##_##OP \ + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \ + TYPE(SIZE) *restrict c, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = OP (b[i], c[i]); \ +} + +TEMPLATE (8); +TEMPLATE (16); +TEMPLATE (32); +TEMPLATE (64); + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c new file mode 100644 index 0000000000000000000000000000000000000000..da8c86161625ff51814c0d8d4e5d51035ad1b1f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/nlogic_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#define OP(x,y) (~((x) & (y))) + +#include "nlogic_1.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */ +/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */ + +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + -----Original Message----- From: Richard Sandiford <richard.sandif...@arm.com> Sent: 17 October 2019 11:28 To: Yuliang Wang <yuliang.w...@arm.com> Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com> Subject: Re: [AArch64][SVE2] Support for EOR3 and variants of BSL Yuliang Wang <yuliang.w...@arm.com> writes: > Hi Richard, > > Thanks for the suggestions, updated. > > Regards, > Yuliang > > > gcc/ChangeLog: > > 2019-10-17 Yuliang Wang <yuliang.w...@arm.com> > > * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>) > (aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>) > (aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>) > (aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>): > New combine patterns. > * config/aarch64/iterators.md (BSL_DUP): New int iterator for the above. > (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above. > * config/aarch64/aarch64.h (AARCH64_ISA_SVE2_SHA3): New ISA flag macro. > (TARGET_SVE2_SHA3): New CPU target. > > gcc/testsuite/ChangeLog: > > 2019-10-17 Yuliang Wang <yuliang.w...@arm.com> > > * gcc.target/aarch64/sve2/eor3_1.c: New test. > * gcc.target/aarch64/sve2/eor3_2.c: As above. > * gcc.target/aarch64/sve2/nlogic_1.c: As above. > * gcc.target/aarch64/sve2/nlogic_2.c: As above. > * gcc.target/aarch64/sve2/bitsel_1.c: As above. > * gcc.target/aarch64/sve2/bitsel_2.c: As above. > * gcc.target/aarch64/sve2/bitsel_3.c: As above. > * gcc.target/aarch64/sve2/bitsel_4.c: As above. > > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index > b018f5b0bc9b51edf831e2571f0f5a9af2210829..08d5214a3debb9e9a0796da0af30 > 09ed3ff55774 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -142,3 +142,189 @@ > } > ) > > +;; Unpredicated 3-way exclusive OR. > +(define_insn "*aarch64_sve2_eor3<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") > + (xor:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "0, w, w, w") > + (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")) > + (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))] > + "TARGET_SVE2_SHA3" EOR3 is part of base SVE2, it doesn't require the SHA3 extension. > +;; Unpredicated bitwise select. > +;; N.B. non-canonical equivalent form due to expand pass. Think it would be better to drop this line (and similarly for the patterns below). The form isn't non-canonical -- there just isn't a defined canonical from here. :-) It is the expected form as things stand. > +;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ > +bsl_dup) (define_insn "*aarch64_sve2_bsl<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (xor:SVE_I > + (and:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (match_dup BSL_DUP)))] > + "TARGET_SVE2" > + "@ > + bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d > + movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise inverted select. > +;; N.B. non-canonical equivalent form. > +;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ > +bsl_dup)) (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (unspec:SVE_I > + [(match_operand 4) > + (not:SVE_I > + (xor:SVE_I > + (and:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (match_dup BSL_DUP)))] > + UNSPEC_PRED_X))] > + "TARGET_SVE2" > + "@ > + nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d > + movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise select with inverted first operand. > +;; N.B. non-canonical equivalent form. > +;; (op3 ? ~bsl_mov : bsl_dup) == (((~bsl_mov ^ bsl_dup) & op3) ^ > +bsl_dup) That's true, but I think: ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) is clearer, to match the rtl. > +(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (xor:SVE_I > + (and:SVE_I > + (unspec:SVE_I > + [(match_operand 4) > + (not:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))] > + UNSPEC_PRED_X) > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (match_dup BSL_DUP)))] > + "TARGET_SVE2" > + "@ > + bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d > + movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise select with inverted second operand. > +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" Would be good to have a comment here too: ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (ior:SVE_I > + (and:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (unspec:SVE_I > + [(match_operand 4) > + (and:SVE_I > + (not:SVE_I > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (not:SVE_I > + (match_dup BSL_DUP)))] > + UNSPEC_PRED_X)))] > + "TARGET_SVE2" > + "@ > + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d > + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise select with inverted second operand, alternative > form. ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) > +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (ior:SVE_I > + (and:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (unspec:SVE_I > + [(match_operand 4) > + (and:SVE_I > + (not:SVE_I > + (match_dup BSL_DUP)) > + (not:SVE_I > + (match_operand:SVE_I 3 "register_operand" "w, w")))] > + UNSPEC_PRED_X)))] > + "TARGET_SVE2" > + "@ > + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d > + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > diff --git a/gcc/config/aarch64/aarch64.h > b/gcc/config/aarch64/aarch64.h index > abd14a2f92c06828adfc6d2e2e81b63a6163d3a3..cad401ceb2419b6a0a64f2396c8e > 7d5b9105fb22 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -236,6 +236,7 @@ extern unsigned aarch64_architecture_version; > #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) > #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) > #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) > +#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & > AARCH64_FL_SVE2_SHA3) > #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) > #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) > #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) > @@ -285,6 +286,9 @@ extern unsigned aarch64_architecture_version; > /* SVE2 instructions, enabled through +sve2. */ #define TARGET_SVE2 > (AARCH64_ISA_SVE2) > > +/* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ #define > +TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3) > + > /* ARMv8.3-A features. */ > #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) With the above change, these macros aren't needed. Thanks, Richard