Soumya AR <soum...@nvidia.com> writes:
> @@ -1815,6 +1849,42 @@
>    }
>  )
>  
> +(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
> +  [(set (match_operand:VDQ_I 0 "register_operand")
> +     (ior:VDQ_I
> +       (and:VDQ_I
> +         (match_operand:VDQ_I 1 "register_operand")
> +         (match_operand:VDQ_I 2 "register_operand"))
> +           (and:VDQ_I
> +             (not:VDQ_I
> +               (match_operand:VDQ_I 3 "register_operand"))
> +             (not:VDQ_I
> +               (match_dup BSL_DUP)))))]

The second "and" should be indented by the same amount as the first "and":

(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand")
        (ior:VDQ_I
          (and:VDQ_I
            (match_operand:VDQ_I 1 "register_operand")
            (match_operand:VDQ_I 2 "register_operand"))
          (and:VDQ_I
            (not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
            (not:VDQ_I (match_dup BSL_DUP)))))]

> +  "TARGET_SVE2"
> +  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
> +     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, 
> %Z0.d, %Z3.d, %Z<bsl_dup>.d
> +     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, 
> %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
> +  }
> +)
> +
> +(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
> +  [(set (match_operand:VDQ_I 0 "register_operand")
> +     (ior:VDQ_I
> +       (and:VDQ_I
> +         (match_operand:VDQ_I 1 "register_operand")
> +         (match_operand:VDQ_I 2 "register_operand"))
> +           (and:VDQ_I
> +             (not:VDQ_I
> +               (match_dup BSL_DUP))
> +             (not:VDQ_I
> +               (match_operand:VDQ_I 3 "register_operand")))))]

Similarly here.

OK with those changes, thanks.

Richard

> +  "TARGET_SVE2"
> +  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
> +     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, 
> %Z0.d, %Z3.d, %Z<bsl_dup>.d
> +     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, 
> %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
> +  }
> +)
> +
>  ;; -------------------------------------------------------------------------
>  ;; ---- [INT] Shift-and-accumulate operations
>  ;; -------------------------------------------------------------------------
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bitsel.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/bitsel.c
> new file mode 100644
> index 00000000000..635bfefc17c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/bitsel.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2 -mcpu=neoverse-v2 --param 
> aarch64-autovec-preference=asimd-only" } */
> +
> +#include <stdint.h>
> +
> +#define OPNBSL(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
> +#define OPBSL1N(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
> +#define OPBSL2N(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
> +
> +#define N 1024
> +
> +#define TYPE(N) int##N##_t
> +
> +#define TEST(SIZE, OP, SUFFIX)                                  \
> +void __attribute__ ((noinline, noclone))                        \
> +f_##SIZE##_##SUFFIX                                             \
> +  (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b,              \
> +   TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d)              \
> +{                                                               \
> +  for (int i = 0; i < N; i++)                                   \
> +    a[i] = OP (b[i], c[i], d[i]);                               \
> +}
> +
> +#define TEST_ALL(SIZE)                                          \
> +  TEST(SIZE, OPNBSL, nbsl)                                      \
> +  TEST(SIZE, OPBSL1N, bsl1n)                                    \
> +  TEST(SIZE, OPBSL2N, bsl2n)
> +
> +TEST_ALL(8);
> +TEST_ALL(16);
> +TEST_ALL(32);
> +TEST_ALL(64);
> +
> +/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, 
> z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, 
> z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
> +/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, 
> z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
> \ No newline at end of file

Reply via email to