On Wed, 29 Sep 2021, Tamar Christina wrote:

> Hi All,
> 
> This optimizes the case where a mask Y which fulfills ~Y + 1 == pow2 is used 
> to
> clear a some bits and then compared against 0 into one without the masking and
> a compare against a different bit immediate.
> 
> We can do this for all unsigned compares and for signed we can do it for
> comparisons of EQ and NE:
> 
> (x & (~255)) == 0 becomes x <= 255. Which for leaves it to the target to
> optimally deal with the comparison.
> 
> This transformation has to be done in the mid-end because in RTL you don't 
> have
> the signs of the comparison operands and if the target needs an immediate this
> should be floated outside of the loop.
> 
> The RTL loop invariant hoisting is done before split1.
> 
> i.e.
> 
> void fun1(int32_t *x, int n)
> {
>     for (int i = 0; i < (n & -16); i++)
>       x[i] = (x[i]&(~255)) == 0;
> }
> 
> now generates:
> 
> .L3:
>         ldr     q0, [x0]
>         cmhs    v0.4s, v2.4s, v0.4s
>         and     v0.16b, v1.16b, v0.16b
>         str     q0, [x0], 16
>         cmp     x0, x1
>         bne     .L3
> 
> and floats the immediate out of the loop.
> 
> instead of:
> 
> .L3:
>         ldr     q0, [x0]
>         bic     v0.4s, #255
>         cmeq    v0.4s, v0.4s, #0
>         and     v0.16b, v1.16b, v0.16b
>         str     q0, [x0], 16
>         cmp     x0, x1
>         bne     .L3
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * match.pd: New bitmask compare pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/bic-bitmask-10.c: New test.
>       * gcc.dg/bic-bitmask-11.c: New test.
>       * gcc.dg/bic-bitmask-12.c: New test.
>       * gcc.dg/bic-bitmask-2.c: New test.
>       * gcc.dg/bic-bitmask-3.c: New test.
>       * gcc.dg/bic-bitmask-4.c: New test.
>       * gcc.dg/bic-bitmask-5.c: New test.
>       * gcc.dg/bic-bitmask-6.c: New test.
>       * gcc.dg/bic-bitmask-7.c: New test.
>       * gcc.dg/bic-bitmask-8.c: New test.
>       * gcc.dg/bic-bitmask-9.c: New test.
>       * gcc.dg/bic-bitmask.h: New test.
>       * gcc.target/aarch64/bic-bitmask-1.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 0fcfd0ea62c043dc217d0d560ce5b7e569b70e7d..df9212cb27d172856b9d43b0875262f96e8993c4
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4288,6 +4288,56 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>      (if (ic == ncmp)
>       (ncmp @0 @1))))))
>  
> +/* Transform comparisons of the form (X & Y) CMP 0 to X CMP2 Z
> +   where ~Y + 1 == pow2 and Z = ~Y.  */
> +(for cmp (simple_comparison)
> + (simplify
> +  (cmp (bit_and:c @0 VECTOR_CST@1) integer_zerop)

Why not for INTEGER_CST as well?  We do have a related folding (only
for INTEGER_CST) that does

/* A & (2**N - 1) <= 2**K - 1 -> A & (2**N - 2**K) == 0
   A & (2**N - 1) >  2**K - 1 -> A & (2**N - 2**K) != 0

which could be extended for integer vectors.  That said, can you please
place the pattern next to the above?

Why does the transform only work for uniform vector constants?  (I see
that the implementation becomes simpler, but then you should also handle
the INTEGER_CST case at least)

> +   (if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (@1))
> +     && uniform_vector_p (@1))
> +    (with { tree elt = vector_cst_elt (@1, 0); }
> +     (switch
> +      (if (TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_uhwi_p (elt))

avoid tree_fits_uhwi_p and use wide_int here

> +     (with { unsigned HOST_WIDE_INT diff = tree_to_uhwi (elt);
> +             tree tdiff = wide_int_to_tree (TREE_TYPE (elt), (~diff) + 1);
> +             tree newval = wide_int_to_tree (TREE_TYPE (elt), ~diff);
> +             tree newmask = build_uniform_cst (TREE_TYPE (@1), newval); }
> +      (if (integer_pow2p (tdiff))

You don't seem to use 'tdiff' so please do this check in wide_int

> +       (switch
> +        /* ((mask & x) < 0) -> 0.  */
> +        (if (cmp == LT_EXPR)
> +         { build_zero_cst (TREE_TYPE (@1)); })
> +        /* ((mask & x) <= 0) -> x < mask.  */
> +        (if (cmp == LE_EXPR)
> +         (lt @0 { newmask; }))
> +        /* ((mask & x) == 0) -> x < mask.  */
> +        (if (cmp == EQ_EXPR)
> +         (le @0 { newmask; }))
> +        /* ((mask & x) != 0) -> x > mask.  */
> +        (if (cmp == NE_EXPR)
> +         (gt @0 { newmask; }))
> +        /* ((mask & x) >= 0) -> x <= mask.  */
> +        (if (cmp == GE_EXPR)
> +         (le @0 { newmask; }))
> +         /* ((mask & x) > 0) -> x < mask.  */
> +        (if (cmp == GT_EXPR)
> +         (lt @0 { newmask; }))))))

you can avoid this switch with a lock-step (for, that maps 'cmp'
to the result comparison code (for simplicity you can either keep
the LT_EXPR special-case or transform to an always true condition
which will be simplified).

> +      (if (!TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_shwi_p (elt))
> +     (with { unsigned HOST_WIDE_INT diff = tree_to_shwi (elt);
> +             tree ustype = unsigned_type_for (TREE_TYPE (elt));
> +             tree uvtype = unsigned_type_for (TREE_TYPE (@1));
> +             tree tdiff = wide_int_to_tree (ustype, (~diff) + 1);
> +             tree udiff = wide_int_to_tree (ustype, ~diff);
> +             tree cst = build_uniform_cst (uvtype, udiff); }
> +      (if (integer_pow2p (tdiff))
> +       (switch
> +         /* ((mask & x) == 0) -> x < mask.  */
> +         (if (cmp == EQ_EXPR)
> +          (le (convert:uvtype @0) { cst; }))
> +         /* ((mask & x) != 0) -> x > mask.  */
> +         (if (cmp == NE_EXPR)
> +          (gt (convert:uvtype @0) { cst; })))))))))))
> +
>  /* Transform comparisons of the form X - Y CMP 0 to X CMP Y.
>     ??? The transformation is valid for the other operators if overflow
>     is undefined for the type, but performing it here badly interacts
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-10.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..76a22a2313137a2a75dd711c2c15c2d3a34e15aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +#define TYPE int32_t
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-11.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..32553d7ba2f823f7a21237451990d0a216d2f912
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) != 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) != 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump {>\s*.+\{ 255,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-12.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..e10cbf7fabe2dbf7ce436cdf37b0f8b207c58408
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
> @@ -0,0 +1,17 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +typedef unsigned int v4si __attribute__ ((vector_size (16)));
> +
> +__attribute__((noinline, noipa))
> +void fun(v4si *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-2.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-3.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) == 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-4.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..1bcf23ccf1447d6c8c999ed1eb25ba0a450028e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) >= 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) >= 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {=\s*.+\{ 1,.+\}} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-5.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..6e5a2fca9992efbc01f8dbbc6f95936e86643028
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) > 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) > 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {>\s*.+\{ 255,.+\}} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&`s*.+\{ 4294967040,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-6.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..018e7a4348c9fc461106c3d9d01291325d3406c2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) <= 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~255)) <= 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-7.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..798678fb7555052c93abc4ca34f617d640f73bb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
> @@ -0,0 +1,24 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~1)) < 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~1)) < 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {__builtin_memset} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-8.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..1dabe834ed57dfa0be48c1dc3dbb226092c79a1a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~1)) != 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~1)) != 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-times {>\s*.+\{ 1,.+\}} 1 dce7 } } */
> +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967294,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-9.c 
> b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..9c1f8ee0adfc45d1b9fc212138ea26bb6b693e49
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
> +
> +#include <stdint.h>
> +
> +__attribute__((noinline, noipa))
> +void fun1(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~5)) == 0;
> +}
> +
> +__attribute__((noinline, noipa, optimize("O1")))
> +void fun2(uint32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (x[i]&(~5)) == 0;
> +}
> +
> +#include "bic-bitmask.h"
> +
> +/* { dg-final { scan-tree-dump-not {<=\s*.+\{ 4294967289,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump {&\s*.+\{ 4294967290,.+\}} dce7 } } */
> +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* 
> } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/bic-bitmask.h 
> b/gcc/testsuite/gcc.dg/bic-bitmask.h
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..2b94065c025e0cbf71a21ac9b9d6314e24b0c2d9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/bic-bitmask.h
> @@ -0,0 +1,43 @@
> +#include <stdio.h>
> +
> +#ifndef N
> +#define N 50
> +#endif
> +
> +#ifndef TYPE
> +#define TYPE uint32_t
> +#endif
> +
> +#ifndef DEBUG
> +#define DEBUG 0
> +#endif
> +
> +#define BASE ((TYPE) -1 < 0 ? -126 : 4)
> +
> +int main ()
> +{
> +  TYPE a[N];
> +  TYPE b[N];
> +
> +  for (int i = 0; i < N; ++i)
> +    {
> +      a[i] = BASE + i * 13;
> +      b[i] = BASE + i * 13;
> +      if (DEBUG)
> +        printf ("%d: 0x%x\n", i, a[i]);
> +    }
> +
> +  fun1 (a, N);
> +  fun2 (b, N);
> +
> +  for (int i = 0; i < N; ++i)
> +    {
> +      if (DEBUG)
> +        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
> +
> +      if (a[i] != b[i])
> +        __builtin_abort ();
> +    }
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c 
> b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..568c1ffc8bc4148efaeeba7a45a75ecbd3a7a3dd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
> @@ -0,0 +1,13 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 -save-temps" } */
> +
> +#include <arm_neon.h>
> +
> +uint32x4_t foo (int32x4_t a)
> +{
> +  int32x4_t cst = vdupq_n_s32 (255);
> +  int32x4_t zero = vdupq_n_s32 (0);
> +  return vceqq_s32 (vbicq_s32 (a, cst), zero);
> +}
> +
> +/* { dg-final { scan-assembler-not {\tbic\t} { xfail { aarch64*-*-* } } } } 
> */
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to