On Tue, 5 Oct 2021, Tamar Christina wrote: > Hi All, > > Here's a new version of the patch handling both scalar and vector modes > and non-uniform constant vectors. > > Bootstrapped Regtested on aarch64-none-linux-gnu, > x86_64-pc-linux-gnu and no regressions. > > In order to not break IVopts and CSE I have added a > requirement for the scalar version to be single use.
OK. Thanks, Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > * tree.c (bitmask_inv_cst_vector_p): New. > * tree.h (bitmask_inv_cst_vector_p): New. > * match.pd: Use it in new bitmask compare pattern. > > gcc/testsuite/ChangeLog: > > * gcc.dg/bic-bitmask-10.c: New test. > * gcc.dg/bic-bitmask-11.c: New test. > * gcc.dg/bic-bitmask-12.c: New test. > * gcc.dg/bic-bitmask-13.c: New test. > * gcc.dg/bic-bitmask-14.c: New test. > * gcc.dg/bic-bitmask-15.c: New test. > * gcc.dg/bic-bitmask-16.c: New test. > * gcc.dg/bic-bitmask-17.c: New test. > * gcc.dg/bic-bitmask-18.c: New test. > * gcc.dg/bic-bitmask-19.c: New test. > * gcc.dg/bic-bitmask-2.c: New test. > * gcc.dg/bic-bitmask-20.c: New test. > * gcc.dg/bic-bitmask-21.c: New test. > * gcc.dg/bic-bitmask-22.c: New test. > * gcc.dg/bic-bitmask-23.c: New test. > * gcc.dg/bic-bitmask-3.c: New test. > * gcc.dg/bic-bitmask-4.c: New test. > * gcc.dg/bic-bitmask-5.c: New test. > * gcc.dg/bic-bitmask-6.c: New test. > * gcc.dg/bic-bitmask-7.c: New test. > * gcc.dg/bic-bitmask-8.c: New test. > * gcc.dg/bic-bitmask-9.c: New test. > * gcc.dg/bic-bitmask.h: New test. > * gcc.target/aarch64/bic-bitmask-1.c: New test. > > --- inline copy of patch -- > > diff --git a/gcc/match.pd b/gcc/match.pd > index > 0fcfd0ea62c043dc217d0d560ce5b7e569b70e7d..7d2a24dbc5e9644a09968f877e12a824d8ba1caa > 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -37,7 +37,8 @@ along with GCC; see the file COPYING3. If not see > integer_pow2p > uniform_integer_cst_p > HONOR_NANS > - uniform_vector_p) > + uniform_vector_p > + bitmask_inv_cst_vector_p) > > /* Operator lists. */ > (define_operator_list tcc_comparison > @@ -4900,6 +4901,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (eqcmp (bit_and @1 { wide_int_to_tree (ty, mask - rhs); }) > { build_zero_cst (ty); })))))) > > +/* Transform comparisons of the form (X & Y) CMP 0 to X CMP2 Z > + where ~Y + 1 == pow2 and Z = ~Y. */ > +(for cst (VECTOR_CST INTEGER_CST) > + (for cmp (le eq ne ge gt) > + icmp (le le gt le gt) > + (simplify > + (cmp (bit_and:c@2 @0 cst@1) integer_zerop) > + (with { tree csts = bitmask_inv_cst_vector_p (@1); } > + (switch > + (if (csts && TYPE_UNSIGNED (TREE_TYPE (@1)) > + && (VECTOR_TYPE_P (TREE_TYPE (@1)) || single_use (@2))) > + (icmp @0 { csts; })) > + (if (csts && !TYPE_UNSIGNED (TREE_TYPE (@1)) > + && (cmp == EQ_EXPR || cmp == NE_EXPR) > + && (VECTOR_TYPE_P (TREE_TYPE (@1)) || single_use (@2))) > + (with { tree utype = unsigned_type_for (TREE_TYPE (@1)); } > + (icmp (convert:utype @0) { csts; })))))))) > + > /* -A CMP -B -> B CMP A. */ > (for cmp (tcc_comparison) > scmp (swapped_tcc_comparison) > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-10.c > b/gcc/testsuite/gcc.dg/bic-bitmask-10.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..76a22a2313137a2a75dd711c2c15c2d3a34e15aa > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-10.c > @@ -0,0 +1,26 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +#define TYPE int32_t > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-11.c > b/gcc/testsuite/gcc.dg/bic-bitmask-11.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..32553d7ba2f823f7a21237451990d0a216d2f912 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-11.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) != 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) != 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump {>\s*.+\{ 255,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-12.c > b/gcc/testsuite/gcc.dg/bic-bitmask-12.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..e10cbf7fabe2dbf7ce436cdf37b0f8b207c58408 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-12.c > @@ -0,0 +1,17 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O3 -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +typedef unsigned int v4si __attribute__ ((vector_size (16))); > + > +__attribute__((noinline, noipa)) > +void fun(v4si *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-13.c > b/gcc/testsuite/gcc.dg/bic-bitmask-13.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..8ebaa30238c761b8831685209a7490f06591c000 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-13.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {<=\s* 255} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-14.c > b/gcc/testsuite/gcc.dg/bic-bitmask-14.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..0a2789433f8bc45a590d136179b8ee4ec5cda1c1 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-14.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {<=\s* 255} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-15.c > b/gcc/testsuite/gcc.dg/bic-bitmask-15.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..53a2c986b00f159ae5fa839798850ac42e9b9504 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-15.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) >= 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) >= 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {=\s* 1} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-16.c > b/gcc/testsuite/gcc.dg/bic-bitmask-16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..fff4670e2a47106c614c1224b8e8aac091d6e821 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-16.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) > 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) > 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {>\s* 255} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-17.c > b/gcc/testsuite/gcc.dg/bic-bitmask-17.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..7b877fde017de0fb7aeabd4152a1593f07d52e71 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-17.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) <= 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) <= 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {<=\s* 255} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-18.c > b/gcc/testsuite/gcc.dg/bic-bitmask-18.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..8455bf5286a7cc9f08713489e29b5f2b6f6fc012 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-18.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) < 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) < 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {= 0} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-19.c > b/gcc/testsuite/gcc.dg/bic-bitmask-19.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..ad266cec0d10a1afc0bdacd8762d328bff1f6ea4 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-19.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) != 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) != 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {>\s* 1} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967294} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-2.c > b/gcc/testsuite/gcc.dg/bic-bitmask-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-2.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-20.c > b/gcc/testsuite/gcc.dg/bic-bitmask-20.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..befd25cb4aac6fe206110e7ca80816dd6fc0ed94 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-20.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~5)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~5)) == 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-not {<=\s* 4294967289} dce7 } } */ > +/* { dg-final { scan-tree-dump {&\s* 4294967290} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-21.c > b/gcc/testsuite/gcc.dg/bic-bitmask-21.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..b037ffc248eef1509c642abb0087b77882679150 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-21.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(int32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +#define TYPE int32_t > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump {<=\s* 255} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967290} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-22.c > b/gcc/testsuite/gcc.dg/bic-bitmask-22.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..20027a6367a1dcec12492435f260250a7b54aca1 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-22.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) != 0; > +} > + > +__attribute__((noinline, noipa, optimize("O0"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) != 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump {>\s* 255} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s* 4294967290} dce7 } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-23.c > b/gcc/testsuite/gcc.dg/bic-bitmask-23.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..9aef4baed796cf7bf4998d1b4f4ddfbc457add29 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-23.c > @@ -0,0 +1,16 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O1 -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +typedef unsigned int v4si __attribute__ ((vector_size (16))); > + > +__attribute__((noinline, noipa)) > +v4si fun(v4si x) > +{ > + v4si mask = { 255, 15, 1, 0xFFFF }; > + v4si zeros = {0}; > + return (x & ~mask) == zeros; > +} > + > +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255, 15, 1, 65535 \}} dce7 } } */ > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-3.c > b/gcc/testsuite/gcc.dg/bic-bitmask-3.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-3.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) == 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-4.c > b/gcc/testsuite/gcc.dg/bic-bitmask-4.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..1bcf23ccf1447d6c8c999ed1eb25ba0a450028e1 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-4.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) >= 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) >= 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {=\s*.+\{ 1,.+\}} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-5.c > b/gcc/testsuite/gcc.dg/bic-bitmask-5.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..d053727076abedefdecfda7c4fea6f92d54a94a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-5.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) > 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) > 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {>\s*.+\{ 255,.+\}} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-6.c > b/gcc/testsuite/gcc.dg/bic-bitmask-6.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..018e7a4348c9fc461106c3d9d01291325d3406c2 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-6.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) <= 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~255)) <= 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-7.c > b/gcc/testsuite/gcc.dg/bic-bitmask-7.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..798678fb7555052c93abc4ca34f617d640f73bb4 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-7.c > @@ -0,0 +1,24 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) < 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) < 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {__builtin_memset} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-8.c > b/gcc/testsuite/gcc.dg/bic-bitmask-8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..1dabe834ed57dfa0be48c1dc3dbb226092c79a1a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-8.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) != 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~1)) != 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-times {>\s*.+\{ 1,.+\}} 1 dce7 } } */ > +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967294,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-9.c > b/gcc/testsuite/gcc.dg/bic-bitmask-9.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..9c1f8ee0adfc45d1b9fc212138ea26bb6b693e49 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask-9.c > @@ -0,0 +1,25 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ > + > +#include <stdint.h> > + > +__attribute__((noinline, noipa)) > +void fun1(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~5)) == 0; > +} > + > +__attribute__((noinline, noipa, optimize("O1"))) > +void fun2(uint32_t *x, int n) > +{ > + for (int i = 0; i < (n & -16); i++) > + x[i] = (x[i]&(~5)) == 0; > +} > + > +#include "bic-bitmask.h" > + > +/* { dg-final { scan-tree-dump-not {<=\s*.+\{ 4294967289,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump {&\s*.+\{ 4294967290,.+\}} dce7 } } */ > +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* > } } } } */ > + > diff --git a/gcc/testsuite/gcc.dg/bic-bitmask.h > b/gcc/testsuite/gcc.dg/bic-bitmask.h > new file mode 100644 > index > 0000000000000000000000000000000000000000..faf80b974db07a7d817a615cc64a35f1020e9764 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/bic-bitmask.h > @@ -0,0 +1,43 @@ > +#include <stdio.h> > + > +#ifndef N > +#define N 65 > +#endif > + > +#ifndef TYPE > +#define TYPE uint32_t > +#endif > + > +#ifndef DEBUG > +#define DEBUG 0 > +#endif > + > +#define BASE ((TYPE) -1 < 0 ? -126 : 4) > + > +int main () > +{ > + TYPE a[N]; > + TYPE b[N]; > + > + for (int i = 0; i < N; ++i) > + { > + a[i] = BASE + i * 13; > + b[i] = BASE + i * 13; > + if (DEBUG) > + printf ("%d: 0x%x\n", i, a[i]); > + } > + > + fun1 (a, N); > + fun2 (b, N); > + > + for (int i = 0; i < N; ++i) > + { > + if (DEBUG) > + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); > + > + if (a[i] != b[i]) > + __builtin_abort (); > + } > + return 0; > +} > + > diff --git a/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c > b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..568c1ffc8bc4148efaeeba7a45a75ecbd3a7a3dd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c > @@ -0,0 +1,13 @@ > +/* { dg-do assemble } */ > +/* { dg-options "-O2 -save-temps" } */ > + > +#include <arm_neon.h> > + > +uint32x4_t foo (int32x4_t a) > +{ > + int32x4_t cst = vdupq_n_s32 (255); > + int32x4_t zero = vdupq_n_s32 (0); > + return vceqq_s32 (vbicq_s32 (a, cst), zero); > +} > + > +/* { dg-final { scan-assembler-not {\tbic\t} { xfail { aarch64*-*-* } } } } > */ > diff --git a/gcc/tree.h b/gcc/tree.h > index > 905417fd17b1721f6c65d3cdf66a5fd5f920de7f..3583d51409cad9471fe45de64806e78d855bd97c > 100644 > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -4862,6 +4862,11 @@ extern bool integer_minus_onep (const_tree); > > extern bool integer_pow2p (const_tree); > > +/* Checks to see if T is a constant or a constant vector and if each element > E > + adheres to ~E + 1 == pow2 then return ~E otherwise NULL_TREE. */ > + > +extern tree bitmask_inv_cst_vector_p (tree); > + > /* integer_nonzerop (tree x) is nonzero if X is an integer constant > with a nonzero value. */ > > diff --git a/gcc/tree.c b/gcc/tree.c > index > cba3bca41b3a50557939267b7a06df04b5d042b7..801f121a368b088d0f769f16f2ed29e320e71556 > 100644 > --- a/gcc/tree.c > +++ b/gcc/tree.c > @@ -10246,6 +10246,59 @@ uniform_integer_cst_p (tree t) > return NULL_TREE; > } > > +/* Checks to see if T is a constant or a constant vector and if each element > E > + adheres to ~E + 1 == pow2 then return ~E otherwise NULL_TREE. */ > + > +tree > +bitmask_inv_cst_vector_p (tree t) > +{ > + > + tree_code code = TREE_CODE (t); > + tree type = TREE_TYPE (t); > + > + if (!INTEGRAL_TYPE_P (type) > + && !VECTOR_INTEGER_TYPE_P (type)) > + return NULL_TREE; > + > + unsigned HOST_WIDE_INT nelts = 1; > + tree cst; > + unsigned int idx = 0; > + bool uniform = uniform_integer_cst_p (t); > + tree newtype = unsigned_type_for (type); > + tree_vector_builder builder; > + if (code == INTEGER_CST) > + cst = t; > + else > + { > + if (!VECTOR_CST_NELTS (t).is_constant (&nelts)) > + return NULL_TREE; > + > + cst = vector_cst_elt (t, 0); > + builder.new_vector (newtype, nelts, 1); > + } > + > + tree ty = unsigned_type_for (TREE_TYPE (cst)); > + > + do { > + if (idx > 0) > + cst = vector_cst_elt (t, idx); > + wide_int icst = wi::to_wide (cst); > + wide_int inv = wi::bit_not (icst); > + icst = wi::add (1, inv); > + if (wi::popcount (icst) != 1) > + return NULL_TREE; > + > + tree newcst = wide_int_to_tree (ty, inv); > + > + if (uniform) > + return build_uniform_cst (newtype, newcst); > + > + builder.quick_push (newcst); > + } while (++idx < nelts); > + > + return builder.build (); > +} > + > /* If VECTOR_CST T has a single nonzero element, return the index of that > element, otherwise return -1. */ > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)