On Thu, Oct 21, 2021 at 1:09 PM Hongtao Liu <crazy...@gmail.com> wrote:
>
>  i is
>
> On Wed, Oct 13, 2021 at 8:34 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> > >
> > > Changes in v4:
> > >
> > > 1. Bypass redundant check when inputs have been transformed to the
> > > equivalent canonical form with valid bit operation.
> > >
> > > Changes in v3:
> > >
> > > 1.  Check invalid bit operation.
> > >
> > > commit adedd5c173388ae505470df152b9cb3947339566
> > > Author: Jakub Jelinek <ja...@redhat.com>
> > > Date:   Tue May 3 13:37:25 2016 +0200
> > >
> > >     re PR target/49244 (__sync or __atomic builtins will not emit 'lock 
> > > bts/btr/btc')
> > >
> > > optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> > > with lock bts/btr/btc by turning
> > >
> > >   mask_2 = 1 << cnt_1;
> > >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > >   _5 = _4 & mask_2;
> > >
> > > into
> > >
> > >   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> > >   _5 = _4;
> > >
> > > and
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> > >   _3 = _2 & mask_6;
> > >   _4 = _3 != 0;
> > >
> > > into
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
> > >   _4 = _11 != 0;
> > >
> > > But it failed to optimize many equivalent, but slighly different cases:
> > >
> > > 1.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _4 = (_Bool) _1;
> > > 2.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _4 = (_Bool) _1;
> > > 3.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _7 = ~_1;
> > >   _5 = (_Bool) _7;
> > > 4.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _7 = ~_1;
> > >   _5 = (_Bool) _7;
> > > 5.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _2 = (int) _1;
> > >   _7 = ~_2;
> > >   _5 = (_Bool) _7;
> > > 6.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _2 = (int) _1;
> > >   _7 = ~_2;
> > >   _5 = (_Bool) _7;
> > > 7.
> > >   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
> > >   _2 = (int) _1;
> > >   _5 = _2 & mask;
> > > 8.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > >   _5 = (signed int) _1;
> > >   _4 = _5 < 0;
> > > 9.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > >   _5 = (signed int) _1;
> > >   _4 = _5 < 0;
> > > 10.
> > >   _1 = 1 << bit_4(D);
> > >   mask_5 = (unsigned int) _1;
> > >   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > >   _3 = _2 & mask_5;
> > > 11.
> > >   mask_7 = 1 << bit_6(D);
> > >   _1 = ~mask_7;
> > >   _2 = (unsigned int) _1;
> > >   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
> > >   _4 = (int) _3;
> > >   _5 = _4 & mask_7;
> > >
> > > We make
> > >
> > >   mask_2 = 1 << cnt_1;
> > >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > >   _5 = _4 & mask_2;
> > >
> > > and
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> > >   _3 = _2 & mask_6;
> > >   _4 = _3 != 0;
> > >
> > > the canonical forms for this optimization and transform cases 1-9 to the
> > > equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> > > before __atomic_fetch_or_4/__atomic_fetch_and_4 with
> > >
> > >   _1 = 1 << bit_4(D);
> > >   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
> > >   _3 = _2 & _1;
> > >
> > > and
> > >
> > >   mask_7 = 1 << bit_6(D);
> > >   _1 = ~mask_7;
> > >   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
> > >   _6 = _3 & mask_7;
> > >   _5 = (int) _6;
> > >
> > > gcc/
> > >
> > >         PR middle-end/102566
> > >         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
> > >         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
> > >         different cases to their canonical forms.
> > >
> > > gcc/testsuite/
> > >
> > >         PR middle-end/102566
> > >         * g++.target/i386/pr102566-1.C: New test.
> > >         * g++.target/i386/pr102566-2.C: Likewise.
> > >         * g++.target/i386/pr102566-3.C: Likewise.
> > >         * g++.target/i386/pr102566-4.C: Likewise.
> > >         * g++.target/i386/pr102566-5a.C: Likewise.
> > >         * g++.target/i386/pr102566-5b.C: Likewise.
> > >         * g++.target/i386/pr102566-6a.C: Likewise.
> > >         * g++.target/i386/pr102566-6b.C: Likewise.
> > >         * gcc.target/i386/pr102566-1a.c: Likewise.
> > >         * gcc.target/i386/pr102566-1b.c: Likewise.
> > >         * gcc.target/i386/pr102566-2.c: Likewise.
> > >         * gcc.target/i386/pr102566-3a.c: Likewise.
> > >         * gcc.target/i386/pr102566-3b.c: Likewise.
> > >         * gcc.target/i386/pr102566-4.c: Likewise.
> > >         * gcc.target/i386/pr102566-5.c: Likewise.
> > >         * gcc.target/i386/pr102566-6.c: Likewise.
> > >         * gcc.target/i386/pr102566-7.c: Likewise.
> > >         * gcc.target/i386/pr102566-8a.c: Likewise.
> > >         * gcc.target/i386/pr102566-8b.c: Likewise.
> > >         * gcc.target/i386/pr102566-9a.c: Likewise.
> > >         * gcc.target/i386/pr102566-9b.c: Likewise.
> > >         * gcc.target/i386/pr102566-10a.c: Likewise.
> > >         * gcc.target/i386/pr102566-10b.c: Likewise.
> > >         * gcc.target/i386/pr102566-11.c: Likewise.
> > >         * gcc.target/i386/pr102566-12.c: Likewise.
> > > ---
> > >  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
> > >  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
> > >  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
> > >  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
> > >  26 files changed, 1375 insertions(+), 37 deletions(-)
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > >
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > > new file mode 100644
> > > index 00000000000..94a66d717cc
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > > new file mode 100644
> > > index 00000000000..4f2aea961c2
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > > new file mode 100644
> > > index 00000000000..e88921dd155
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > > new file mode 100644
> > > index 00000000000..44d1362ac2e
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > > @@ -0,0 +1,29 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +typedef int __attribute__ ((mode (__word__))) int_type;
> > > +
> > > +#define BIT (1 << 0)
> > > +
> > > +bool
> > > +tbit0 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > > new file mode 100644
> > > index 00000000000..f9595bee2ab
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > > new file mode 100644
> > > index 00000000000..d917b27a918
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 30)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > > new file mode 100644
> > > index 00000000000..01d495eda23
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C 
> > > b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > > new file mode 100644
> > > index 00000000000..adc11fcbf2d
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 30)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > > new file mode 100644
> > > index 00000000000..1c1f86a9659
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & 
> > > mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > > new file mode 100644
> > > index 00000000000..0bf39824ea6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic long long int *v, int bit)
> > > +{
> > > +  long long int mask = 1ll << bit;
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & 
> > > mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > > new file mode 100644
> > > index 00000000000..2c8f8c4e59a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +#define MASK 0x1234
> > > +
> > > +bool
> > > +foo1 (_Atomic int *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic unsigned int *v, int mask)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +bool
> > > +foo3 (_Atomic unsigned int *v, int mask)
> > > +{
> > > +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & 
> > > mask);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > > new file mode 100644
> > > index 00000000000..4603a77612c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +#define MASK 0x1234
> > > +
> > > +bool
> > > +foo1 (_Atomic long *v)
> > > +{
> > > +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & 
> > > MASK;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic long *v, long mask)
> > > +{
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & 
> > > mask;
> > > +}
> > > +
> > > +bool
> > > +foo3 (_Atomic long *v, long mask)
> > > +{
> > > +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & 
> > > mask);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "btr" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > > new file mode 100644
> > > index 00000000000..a915de354e5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > > @@ -0,0 +1,188 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +void bar (void);
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f1 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f2 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> > > +  int t2 = t1 & mask;
> > > +  return t2 != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f3 (long int *a, int bit)
> > > +{
> > > +  long int mask = 1l << bit;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f4 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f5 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f6 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +f7 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> > > +    bar ();
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +f8 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> > > +    bar ();
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f9 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f10 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f11 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f12 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f13 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f14 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f15 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f16 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f17 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f18 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f19 (long int *a, int bit)
> > > +{
> > > +  long int mask = 1l << bit;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f20 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f21 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_or (a, mask) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f22 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f23 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) short int
> > > +f24 (short int *a)
> > > +{
> > > +  short int mask = 1 << 7;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) short int
> > > +f25 (short int *a)
> > > +{
> > > +  short int mask = 1 << 7;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > > new file mode 100644
> > > index 00000000000..c4dab8135c7
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > > @@ -0,0 +1,107 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -g" } */
> > > +
> > > +int cnt;
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +bar (void)
> > > +{
> > > +  cnt++;
> > > +}
> > > +
> > > +#include "pr102566-1a.c"
> > > +
> > > +int a;
> > > +long int b;
> > > +unsigned long int c;
> > > +unsigned short int d;
> > > +
> > > +int
> > > +main ()
> > > +{
> > > +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> > > +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> > > +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> > > +    __builtin_abort ();
> > > +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> > > +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> > > +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> > > +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> > > +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> > > +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> > > +    __builtin_abort ();
> > > +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > > +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> > > +    __builtin_abort ();
> > > +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> > > +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (cnt != 0
> > > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, 
> > > __ATOMIC_RELAXED) != 8193
> > > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, 
> > > __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) 
> > > != 8193
> > > +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, 
> > > __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > > +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 
> > > 8321)
> > > +    __builtin_abort ();
> > > +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > > +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > > +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 
> > > 8193)
> > > +    __builtin_abort ();
> > > +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > > +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 
> > > 1)
> > > +    __builtin_abort ();
> > > +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > > +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> > > +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > > +    __builtin_abort ();
> > > +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > > +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > > +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > > +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > > +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> > > +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> > > +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 
> > > 144)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> > > +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > > +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > > +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> > > +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> > > +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > > +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > > +      || cnt != 2)
> > > +    __builtin_abort ();
> > > +  return 0;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > > new file mode 100644
> > > index 00000000000..00a7c349f2a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > > new file mode 100644
> > > index 00000000000..8bf1cd6e1bd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > > new file mode 100644
> > > index 00000000000..d155ed367a1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic long long int *v, int bit)
> > > +{
> > > +  long long int mask = 1ll << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > > new file mode 100644
> > > index 00000000000..2668ccf827c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  unsigned int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > > new file mode 100644
> > > index 00000000000..8bf1cd6e1bd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > > new file mode 100644
> > > index 00000000000..3dfe55ac683
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > > new file mode 100644
> > > index 00000000000..6bc0ae0f320
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > > @@ -0,0 +1,30 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +typedef int __attribute__ ((mode (__word__))) int_type;
> > > +
> > > +#define BIT (1 << 0)
> > > +
> > > +bool
> > > +foo0 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> > > +}
> > > +
> > > +bool
> > > +foo1 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > > new file mode 100644
> > > index 00000000000..168e3db78c9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > > new file mode 100644
> > > index 00000000000..392da3098e0
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 62)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > > new file mode 100644
> > > index 00000000000..3fa2a3ef043
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c 
> > > b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > > new file mode 100644
> > > index 00000000000..38ddbdc630f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 62)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & 
> > > BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> > > index 70ce6a4d5b8..bb70b87aa5e 100644
> > > --- a/gcc/tree-ssa-ccp.c
> > > +++ b/gcc/tree-ssa-ccp.c
> > > @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
> > >    return ret;
> > >  }
> > >
> > > +/* Convert
> > > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +   _7 = ~_1;
> > > +   _5 = (_Bool) _7;
> > > +   to
> > > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +   _8 = _1 & 1;
> > > +   _5 = _8 == 0;
> > > +   and convert
> > > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +   _7 = ~_1;
> > > +   _4 = (_Bool) _7;
> > > +   to
> > > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +   _8 = _1 & 1;
> > > +   _4 = (_Bool) _8;
> > > +
> > > +   USE_STMT is the gimplt statement which uses the return value of
> > > +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> > > +   MASK is the mask passed to __atomic_fetch_or_*.
> > > + */
> > > +
> > > +static gimple *
> > > +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> > > +                       tree lhs, tree mask)
> > > +{
> > > +  tree and_mask;
> > > +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +    {
> > > +      /* MASK must be ~1.  */
> > > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > > +                                          ~HOST_WIDE_INT_1), mask, 0))
> > > +       return nullptr;
> > > +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +    }
> > > +  else
> > > +    {
> > > +      /* MASK must be 1.  */
> > > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> > > +       return nullptr;
> > > +      and_mask = mask;
> > > +    }
> > > +
> > > +  tree use_lhs = gimple_assign_lhs (use_stmt);
> > > +
> > > +  use_operand_p use_p;
> > > +  gimple *use_not_stmt;
> > > +
> > > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > > +      || !is_gimple_assign (use_not_stmt))
> > > +    return nullptr;
> > > +
> > > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> > > +    return nullptr;
> > > +
> > > +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> > > +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> > > +    return nullptr;
> > > +
> > > +  gimple_stmt_iterator gsi;
> > > +  gsi = gsi_for_stmt (use_stmt);
> > > +  gsi_remove (&gsi, true);
> > > +  tree var = make_ssa_name (TREE_TYPE (lhs));
> > > +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> > > +  gsi = gsi_for_stmt (use_not_stmt);
> > > +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> > > +  lhs = gimple_assign_lhs (use_not_stmt);
> > > +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> > > +                                  build_zero_cst (TREE_TYPE (mask)));
> > > +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +  gsi = gsi_for_stmt (use_not_stmt);
> > > +  gsi_remove (&gsi, true);
> > > +  return use_stmt;
> > > +}
> > > +
> > >  /* Optimize
> > >       mask_2 = 1 << cnt_1;
> > >       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > > @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator 
> > > *gsip,
> > >    tree lhs = gimple_call_lhs (call);
> > >    use_operand_p use_p;
> > >    gimple *use_stmt;
> > > -  tree mask, bit;
> > > +  tree mask;
> > >    optab optab;
> > >
> > >    if (!flag_inline_atomics
> > > @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and 
> > > (gimple_stmt_iterator *gsip,
> > >        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> > >        || !single_imm_use (lhs, &use_p, &use_stmt)
> > >        || !is_gimple_assign (use_stmt)
> > > -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> > >        || !gimple_vdef (call))
> > >      return;
> > >
> > > +  tree bit = nullptr;
> > > +
> > > +  mask = gimple_call_arg (call, 1);
> > > +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> > > +  if (rhs_code != BIT_AND_EXPR)
> > > +    {
> > > +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> > > +       return;
> > > +
> > > +      tree use_lhs = gimple_assign_lhs (use_stmt);
> > > +      if (TREE_CODE (use_lhs) == SSA_NAME
> > > +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> > > +       return;
> > > +
> > > +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> > > +      if (lhs != use_rhs)
> > > +       return;
> > > +
> > > +      gimple *g;
> > > +      gimple_stmt_iterator gsi;
> > > +      tree var;
> > > +      int ibit = -1;
> > > +
> > > +      if (rhs_code == BIT_NOT_EXPR)
> > > +       {
> > > +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> > > +         if (!g)
> > > +           return;
> > > +         use_stmt = g;
> > > +         ibit = 0;
> > > +       }
> > > +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> > > +       {
> > > +         tree and_mask;
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           {
> > > +             /* MASK must be ~1.  */
> > > +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > > +                                                  ~HOST_WIDE_INT_1),
> > > +                                   mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +                _4 = (_Bool) _1;
> > > +                to
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +                _5 = _1 & 1;
> > > +                _4 = (_Bool) _5;
> > > +              */
> > > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +           }
> > > +         else
> > > +           {
> > > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +             if (!operand_equal_p (and_mask, mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +                _4 = (_Bool) _1;
> > > +                to
> > > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +                _5 = _1 & 1;
> > > +                _4 = (_Bool) _5;
> > > +              */
> > > +           }
> > > +         var = make_ssa_name (TREE_TYPE (use_rhs));
> > > +         replace_uses_by (use_rhs, var);
> > > +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > > +                                  and_mask);
> > > +         gsi = gsi_for_stmt (use_stmt);
> > > +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > > +         use_stmt = g;
> > > +         ibit = 0;
> > > +       }
> > > +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> > > +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> > > +       {
> > > +         gimple *use_nop_stmt;
> > > +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> > > +             || !is_gimple_assign (use_nop_stmt))
> > > +           return;
> > > +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> > > +         if (rhs_code != BIT_AND_EXPR)
> > > +           {
> > > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > > +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> > > +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> > > +               return;
> > > +             if (rhs_code == BIT_NOT_EXPR)
> > > +               {
> > > +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> > > +                                             mask);
> > > +                 if (!g)
> > > +                   return;
> > > +                 /* Convert
> > > +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > > +                    _2 = (int) _1;
> > > +                    _7 = ~_2;
> > > +                    _5 = (_Bool) _7;
> > > +                    to
> > > +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> > > +                    _8 = _1 & 1;
> > > +                    _5 = _8 == 0;
> > > +                    and convert
> > > +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > > +                    _2 = (int) _1;
> > > +                    _7 = ~_2;
> > > +                    _5 = (_Bool) _7;
> > > +                    to
> > > +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> > > +                    _8 = _1 & 1;
> > > +                    _5 = _8 == 0;
> > > +                  */
> > > +                 gsi = gsi_for_stmt (use_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +                 use_stmt = g;
> > > +                 ibit = 0;
> > > +               }
> > > +             else
> > > +               {
> > > +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> > > +                   return;
> > > +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> > > +                   return;
> > > +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > > +                 if (use_lhs != cmp_rhs1)
> > > +                   return;
> > > +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > > +                 if (!integer_zerop (cmp_rhs2))
> > > +                   return;
> > > +
> > > +                 tree and_mask;
> > > +
> > > +                 unsigned HOST_WIDE_INT bytes
> > > +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> > > +                 ibit = bytes * BITS_PER_UNIT - 1;
> > > +                 unsigned HOST_WIDE_INT highest
> > > +                   = HOST_WIDE_INT_1U << ibit;
> > > +
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   {
> > > +                     /* Get the signed maximum of the USE_RHS type.  */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest - 1);
> > > +                     if (!operand_equal_p (and_mask, mask, 0))
> > > +                       return;
> > > +
> > > +                     /* Convert
> > > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, 
> > > _3);
> > > +                        _5 = (signed int) _1;
> > > +                        _4 = _5 < 0 or _5 >= 0;
> > > +                        to
> > > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, 
> > > _3);
> > > +                        _6 = _1 & 0x80000000;
> > > +                        _4 = _6 != 0 or _6 == 0;
> > > +                      */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest);
> > > +                   }
> > > +                 else
> > > +                   {
> > > +                     /* Get the signed minimum of the USE_RHS type.  */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest);
> > > +                     if (!operand_equal_p (and_mask, mask, 0))
> > > +                       return;
> > > +
> > > +                     /* Convert
> > > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > > +                        _5 = (signed int) _1;
> > > +                        _4 = _5 < 0 or _5 >= 0;
> > > +                        to
> > > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > > +                        _6 = _1 & 0x80000000;
> > > +                        _4 = _6 != 0 or _6 == 0;
> > > +                      */
> > > +                   }
> > > +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> > > +                 gsi = gsi_for_stmt (use_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > > +                                          and_mask);
> > > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > > +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > > +                 use_stmt = g;
> > > +                 g = gimple_build_assign (use_nop_lhs,
> > > +                                          (rhs_code == GE_EXPR
> > > +                                           ? EQ_EXPR : NE_EXPR),
> > > +                                          var,
> > > +                                          build_zero_cst (TREE_TYPE 
> > > (use_rhs)));
> > > +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +               }
> > > +           }
> > > +         else
> > > +           {
> > > +             tree op_mask = mask;
> > > +             tree check_mask = op_mask;
> > > +             if (TREE_CODE (op_mask) == SSA_NAME)
> > > +               {
> > > +                 g = SSA_NAME_DEF_STMT (op_mask);
> > > +                 if (!is_gimple_assign (g))
> > > +                   return;
> > > +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> > > +                   {
> > > +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> > > +
> > > +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> > > +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI 
> > > (mask_nop_lhs))
> > > +                       return;
> > > +
> > > +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> > > +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> > > +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> > > +                       return;
> > > +                     op_mask = mask_nop_rhs;
> > > +                     check_mask = op_mask;
> > > +                     g = SSA_NAME_DEF_STMT (op_mask);
> > > +                     if (!is_gimple_assign (g))
> > > +                       return;
> > > +                   }
> > > +
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   {
> > > +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +                       return;
> > > +                     check_mask = gimple_assign_rhs1 (g);
> > > +                     if (TREE_CODE (check_mask) != SSA_NAME)
> > > +                       return;
> > > +                     g = SSA_NAME_DEF_STMT (check_mask);
> > > +                     if (!is_gimple_assign (g))
> > > +                       return;
> > > +                   }
> > > +
> > > +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > > +                     || !integer_onep (gimple_assign_rhs1 (g)))
> > > +                   return;
> > > +
> > > +                 bit = gimple_assign_rhs2 (g);
> > > +               }
> > > +
> > > +             if (TREE_CODE (check_mask) == INTEGER_CST)
> > > +               {
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   check_mask = const_unop (BIT_NOT_EXPR,
> > > +                                            TREE_TYPE (check_mask),
> > > +                                            check_mask);
> > > +                 check_mask = fold_convert (TREE_TYPE (lhs),
> > > +                                            check_mask);
> > > +                 /* Check if CHECK_MASK is a power of two.  */
> > > +                 ibit = tree_log2 (check_mask);
> > > +                 if (ibit < 0)
> > > +                   return;
> > > +               }
> > > +
> > > +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > > +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > > +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> > > +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > > +                _2 = (int) _1;
> > > +                _5 = _2 & mask;
> >
> > (***)
> >
> > > +                to
> > > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > > +                _6 = _1 & mask;
> > > +                _5 = (int) _6;
> > > +                and convert
> > > +                _1 = ~mask_7;
> > > +                _2 = (unsigned int) _1;
> > > +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> > > +                _4 = (int) _3;
> > > +                _5 = _4 & mask_7;
> > > +                to
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> > > +                _12 = _3 & mask_7;
> > > +                _5 = (int) _12;
> > > +              */
> >
> > I wonder if it's better to maintain to have the matching part of match.pd
> I'm trying to rewrite match part in match.pd and find the
> canonicalization is ok when mask is constant, but not for variable
> since it will be simplified back by

Note I didn't suggest to use (simplify (....)) but instead use

(match (...))

you can look at the ctz_table_index example and how it is used from
tree-ssa-forwprop.c as gimple_ctz_table_index ().  With such way you
can replace the boiler-plates for matching expressions.  You can
match multiple related forms (when the "leafs" have the same structure)
by multiple (match instances with the same name, see for example
'nop_convert'.

>  /* In GIMPLE, getting rid of 2 conversions for one new results
>     in smaller IL.  */
>  (simplify
>   (convert (bitop:cs@2 (nop_convert:s @0) @1))
>   (if (GIMPLE
>        && TREE_CODE (@1) != INTEGER_CST
>        && tree_nop_conversion_p (type, TREE_TYPE (@2))
>        && types_match (type, @0))
>    (bitop @0 (convert @1)))))
>
> The canonicalization for variabled is like
>
> convert
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>  _4 = (int) _3;
>  _5 = _4 & mask_7;
>
> to
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>   _4 = (unsigned int) mask_7
>   _6 = _3 & _4
>   _5 = (int) _6
>
> and be simplified back.
>
> I've also tried another way of simplication like
>
> convert
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>  _4 = (int) _3;
>  _5 = _4 & mask_7;
>
> to
>   _1 = (unsigned int)mask_7;
>   _2 = ~ _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>    _6 = _3 & _1
>   _5 = (int)
>
> but it's prevent by below since __atomic_fetch_and_4 is not CONST, but
> we need to regenerate it with updated parameter.
>
>   /* We can't and should not emit calls to non-const functions.  */
>   if (!(flags_from_decl_or_type (decl) & ECF_CONST))
>     return NULL;
>
> >
> > there you could have
> >
> > (match (atomic_fetch_mask @1 @2 @3 @mask)
> >  (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))
> >
> > and here in this code do
> >
> > extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree 
> > (*)(tree));
> >
> > and call it on the _5 from (***) where the function will return true if it
> > matched and it will set res_ops[] with the positional operands @1 @2
> > @3 and @mask.
> >
> > You can add variants and conditions to the same match entry, see match.pd
> > for examples and also match-and-simplify.texi
> >
> > > +             replace_uses_by (use_lhs, lhs);
> > > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > > +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> > > +             gimple_assign_set_lhs (use_nop_stmt, var);
> > > +             gsi = gsi_for_stmt (use_stmt);
> > > +             gsi_remove (&gsi, true);
> > > +             release_defs (use_stmt);
> > > +             gsi_remove (gsip, true);
> > > +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
> >
> > instead of building a GENERIC NOP you could use the
> >
> > gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
> >
> > overload.
> >
> > > +             gsi = gsi_for_stmt (use_nop_stmt);
> > > +             g = gimple_build_assign (use_nop_lhs, var);
> > > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +             use_stmt = use_nop_stmt;
> > > +             mask = op_mask;
> > > +           }
> > > +       }
> > > +
> > > +      if (!bit)
> > > +       {
> > > +         if (ibit < 0)
> > > +           gcc_unreachable ();
> > > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > +       }
> > > +    }
> > > +
> > >    switch (fn)
> > >      {
> > >      case IFN_ATOMIC_BIT_TEST_AND_SET:
> > > @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and 
> > > (gimple_stmt_iterator *gsip,
> > >    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == 
> > > CODE_FOR_nothing)
> > >      return;
> > >
> > > -  mask = gimple_call_arg (call, 1);
> > >    tree use_lhs = gimple_assign_lhs (use_stmt);
> > >    if (!use_lhs)
> > >      return;
> > >
> > > -  if (TREE_CODE (mask) == INTEGER_CST)
> > > +  if (!bit)
> > >      {
> > > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > > -      mask = fold_convert (TREE_TYPE (lhs), mask);
> > > -      int ibit = tree_log2 (mask);
> > > -      if (ibit < 0)
> > > -       return;
> > > -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > -    }
> > > -  else if (TREE_CODE (mask) == SSA_NAME)
> > > -    {
> > > -      gimple *g = SSA_NAME_DEF_STMT (mask);
> > > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +      if (TREE_CODE (mask) == INTEGER_CST)
> > >         {
> > > -         if (!is_gimple_assign (g)
> > > -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > > +         mask = fold_convert (TREE_TYPE (lhs), mask);
> > > +         int ibit = tree_log2 (mask);
> > > +         if (ibit < 0)
> > > +           return;
> > > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > +       }
> > > +      else if (TREE_CODE (mask) == SSA_NAME)
> > > +       {
> > > +         gimple *g = SSA_NAME_DEF_STMT (mask);
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           {
> > > +             if (!is_gimple_assign (g)
> > > +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +               return;
> > > +             mask = gimple_assign_rhs1 (g);
> > > +             if (TREE_CODE (mask) != SSA_NAME)
> > > +               return;
> > > +             g = SSA_NAME_DEF_STMT (mask);
> > > +           }
> > > +         if (!is_gimple_assign (g))
> > >             return;
> > > -         mask = gimple_assign_rhs1 (g);
> > > -         if (TREE_CODE (mask) != SSA_NAME)
> > > +         rhs_code = gimple_assign_rhs_code (g);
> > > +         if (rhs_code != LSHIFT_EXPR)
> > > +           {
> > > +             if (rhs_code != NOP_EXPR)
> > > +               return;
> > > +
> > > +             /* Handle
> > > +                _1 = 1 << bit_4(D);
> > > +                mask_5 = (unsigned int) _1;
> > > +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > > +                _3 = _2 & mask_5;
> > > +                */
> > > +             tree nop_lhs = gimple_assign_lhs (g);
> > > +             tree nop_rhs = gimple_assign_rhs1 (g);
> > > +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> > > +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> > > +               return;
> > > +             g = SSA_NAME_DEF_STMT (nop_rhs);
> > > +             if (!is_gimple_assign (g)
> > > +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> > > +               return;
> > > +           }
> > > +         if (!integer_onep (gimple_assign_rhs1 (g)))
> > >             return;
> > > -         g = SSA_NAME_DEF_STMT (mask);
> > > +         bit = gimple_assign_rhs2 (g);
> > >         }
> > > -      if (!is_gimple_assign (g)
> > > -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > > -         || !integer_onep (gimple_assign_rhs1 (g)))
> > > +      else
> > >         return;
> > > -      bit = gimple_assign_rhs2 (g);
> > > -    }
> > > -  else
> > > -    return;
> > >
> > > -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> > > -    {
> > > -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > > +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> > > +       {
> > > +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > > +           return;
> > > +       }
> > > +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> > > +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> > > +                                   mask, 0))
> > >         return;
> > >      }
> > > -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> > > -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> > > -    return;
> > >
> > >    bool use_bool = true;
> > >    bool has_debug_uses = false;
> > > @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and 
> > > (gimple_stmt_iterator *gsip,
> > >          of the specified bit after the atomic operation (makes only sense
> > >          for xor, otherwise the bit content is compile time known),
> > >          we need to invert the bit.  */
> > > +      tree mask_convert = mask;
> > > +      gimple *g_convert = nullptr;
> > > +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> > > +       {
> > > +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> > > +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> > > +         g_convert = gimple_build_assign (mask_convert, var);
> > > +       }
> > >        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> > >                                BIT_XOR_EXPR, new_lhs,
> > >                                use_bool ? build_int_cst (TREE_TYPE (lhs), 
> > > 1)
> > > -                                       : mask);
> > > +                                       : mask_convert);
> > >        new_lhs = gimple_assign_lhs (g);
> >
> > You could use
> >
> >         gimple_seq stmts = NULL;
> >         mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> >         new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), 
> > new_lhs,
> >                                                use_bool ?
> > build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
> >
> > >        if (throws)
> > >         {
> > > -         gsi_insert_on_edge_immediate (e, g);
> >
> > gsi_insert_seq_on_edge_immediate (e, stmts);
> >
> > to simplify this.  The conversion will be only generated if necessary.
> >
> > > +         if (g_convert)
> > > +           {
> > > +             gsi_insert_on_edge_immediate (e, g_convert);
> > > +             gsi = gsi_for_stmt (g_convert);
> > > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +           }
> > > +         else
> > > +           gsi_insert_on_edge_immediate (e, g);
> > >           gsi = gsi_for_stmt (g);
> > >         }
> > >        else
> > > -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +       {
> > > +         if (g_convert)
> > > +           {
> > > +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> > > +             gsi = gsi_for_stmt (g_convert);
> > > +           }
> > > +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +       }
> > >      }
> > >    if (use_bool && has_debug_uses)
> > >      {
> > > --
> > > 2.31.1
> > >
>
>
>
> --
> BR,
> Hongtao

Reply via email to