[PATCH 4/4] stdbit: clean up namespace and simplify

Paul Eggert Sat, 11 May 2024 12:09:58 -0700

Fix namespace pollution in substitute stdbit.h.
Clean up and simplify some of the non-GCC code, by preferring
inline functions to macros and substituting something
more straightforward than a de Bruijn hash (possibly faster?).
The non-GCC non-C23 substitutes should all compile to
branch-free code, if the compiler is good.
* lib/stdbit.c (COUNT_LEADING_ZEROS_INLINE)
(COUNT_TRAILING_ZEROS_INLINE, COUNT_ONE_BITS_INLINE): Remove.
(__gl_stdbit_clztab) [!_GL_STDBIT_HAS_BUILTIN_CLZ && !_MSC_VER]:
New constant array.
(__gl_stdbit_popcount_support): Adjust to stdbit.in.h changes.
* lib/stdbit.in.h: Do not include <limits.h> or <stdlib.h>.
Check that bytes are 8 bits.
(COUNT_LEADING_ZEROS_INLINE, COUNT_TRAILING_ZEROS_INLINE)
(COUNT_ONE_BITS_INLINE, COUNT_LEADING_ZEROS)
(count_leading_zeros_32, count_leading_zeros)
(count_leading_zeros_l, count_leading_zeros_ll)
(COUNT_TRAILING_ZEROS, count_trailing_zeros_32)
(count_trailing_zeros, count_trailing_zeros_l)
(count_trailing_zeros_ll, COUNT_ONE_BITS, count_one_bits_32)
(COUNT_ONE_BITS_GENERIC, count_one_bits, count_one_bits_l)
(count_one_bits_ll): Remove, replacing all uses with ...
(_GL_STDBIT_HAS_BUILTIN_CLZ)
(_GL_STDBIT_HAS_BUILTIN_CTZ, _GL_STDBIT_HAS_BUILTIN_POPCOUNT)
(__gl_stdbit_clz, __gl_stdbit_clzl, __gl_stdbit_clzll)
(__gl_stdbit_ctz, __gl_stdbit_ctzl, __gl_stdbit_ctzll)
(__gl_stdbit_popcount, __gl_stdbit_popcountl, __gl_stdbit_popcountll)
(__gl_stdbit_popcount255): ... these new functions and macros.
(__popcnt64): Omit unnecessary casts.
(__gl_stdbit_popcount_support): Rename from popcount_support
and make it a signed char since that’s all we need.
(__gl_stdbit_popcount_supported): Rename from popcount_supported.
All uses changed.
* modules/stdbit (Depends-on): Add assert-h, for static_assert.
---
 ChangeLog       |  36 ++++
 lib/stdbit.c    |  50 ++++-
 lib/stdbit.in.h | 553 +++++++++++++++++++-----------------------------
 modules/stdbit  |   1 +
 4 files changed, 298 insertions(+), 342 deletions(-)


diff --git a/ChangeLog b/ChangeLog
index b0e7efd02c..b4319fc47b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,41 @@
 2024-05-11  Paul Eggert  <egg...@cs.ucla.edu>
 
+       stdbit: clean up namespace and simplify
+       Fix namespace pollution in substitute stdbit.h.
+       Clean up and simplify some of the non-GCC code, by preferring
+       inline functions to macros and substituting something
+       more straightforward than a de Bruijn hash (possibly faster?).
+       The non-GCC non-C23 substitutes should all compile to
+       branch-free code, if the compiler is good.
+       * lib/stdbit.c (COUNT_LEADING_ZEROS_INLINE)
+       (COUNT_TRAILING_ZEROS_INLINE, COUNT_ONE_BITS_INLINE): Remove.
+       (__gl_stdbit_clztab) [!_GL_STDBIT_HAS_BUILTIN_CLZ && !_MSC_VER]:
+       New constant array.
+       (__gl_stdbit_popcount_support): Adjust to stdbit.in.h changes.
+       * lib/stdbit.in.h: Do not include <limits.h> or <stdlib.h>.
+       Check that bytes are 8 bits.
+       (COUNT_LEADING_ZEROS_INLINE, COUNT_TRAILING_ZEROS_INLINE)
+       (COUNT_ONE_BITS_INLINE, COUNT_LEADING_ZEROS)
+       (count_leading_zeros_32, count_leading_zeros)
+       (count_leading_zeros_l, count_leading_zeros_ll)
+       (COUNT_TRAILING_ZEROS, count_trailing_zeros_32)
+       (count_trailing_zeros, count_trailing_zeros_l)
+       (count_trailing_zeros_ll, COUNT_ONE_BITS, count_one_bits_32)
+       (COUNT_ONE_BITS_GENERIC, count_one_bits, count_one_bits_l)
+       (count_one_bits_ll): Remove, replacing all uses with ...
+       (_GL_STDBIT_HAS_BUILTIN_CLZ)
+       (_GL_STDBIT_HAS_BUILTIN_CTZ, _GL_STDBIT_HAS_BUILTIN_POPCOUNT)
+       (__gl_stdbit_clz, __gl_stdbit_clzl, __gl_stdbit_clzll)
+       (__gl_stdbit_ctz, __gl_stdbit_ctzl, __gl_stdbit_ctzll)
+       (__gl_stdbit_popcount, __gl_stdbit_popcountl, __gl_stdbit_popcountll)
+       (__gl_stdbit_popcount255): ... these new functions and macros.
+       (__popcnt64): Omit unnecessary casts.
+       (__gl_stdbit_popcount_support): Rename from popcount_support
+       and make it a signed char since that’s all we need.
+       (__gl_stdbit_popcount_supported): Rename from popcount_supported.
+       All uses changed.
+       * modules/stdbit (Depends-on): Add assert-h, for static_assert.
+
        stdbit: remove most module dependence
        Remove dependence of stdbit on the modules count-leading-zeros,
        count-trailing-zeros, and count-one-bits.  stdbit is part of C23
diff --git a/lib/stdbit.c b/lib/stdbit.c
index 2a5626a902..f2a51b10f7 100644
--- a/lib/stdbit.c
+++ b/lib/stdbit.c
@@ -1,9 +1,51 @@
+/* Support C23 bit and byte utilities on non-C23 platforms.
+
+   Copyright 2024 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Paul Eggert.  */
+
 #include <config.h>
+
 #define _GL_STDBIT_INLINE _GL_EXTERN_INLINE
-#define COUNT_LEADING_ZEROS_INLINE _GL_EXTERN_INLINE
-#define COUNT_TRAILING_ZEROS_INLINE _GL_EXTERN_INLINE
-#define COUNT_ONE_BITS_INLINE _GL_EXTERN_INLINE
 #include <stdbit.h>
+
+#if !defined _GL_STDBIT_HAS_BUILTIN_CLZ && !_MSC_VER
+/* __gl_stdbit_clztab[B] is the number of leading zeros in
+   the 8-bit byte with value B.  */
+char const __gl_stdbit_clztab[256] =
+  {
+    8,
+    7,
+    6, 6,
+    5, 5, 5, 5,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    /* The rest is zero.  */
+  };
+#endif
+
 #if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64)
-int popcount_support = -1;
+signed char __gl_stdbit_popcount_support;
 #endif
diff --git a/lib/stdbit.in.h b/lib/stdbit.in.h
index 63be4737a8..ed68bd6513 100644
--- a/lib/stdbit.in.h
+++ b/lib/stdbit.in.h
@@ -25,400 +25,277 @@
  #error "Please include config.h first."
 #endif
 
-/* Taken from count-leading-zeros.h.  */
-#include <limits.h>
-#include <stdlib.h>
-
 _GL_INLINE_HEADER_BEGIN
-#ifndef COUNT_LEADING_ZEROS_INLINE
-# define COUNT_LEADING_ZEROS_INLINE _GL_INLINE
+#ifndef _GL_STDBIT_INLINE
+# define _GL_STDBIT_INLINE _GL_INLINE
 #endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/* Assuming the GCC builtin is BUILTIN and the MSC builtin is MSC_BUILTIN,
-   expand to code that computes the number of leading zeros of the local
-   variable 'x' of type TYPE (an unsigned integer type) and return it
-   from the current function.  */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) \
-    || (__clang_major__ >= 4)
-# define COUNT_LEADING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE)                \
-  return x ? BUILTIN (x) : CHAR_BIT * sizeof x;
-#elif _MSC_VER
-# pragma intrinsic (_BitScanReverse)
-# if defined _M_X64
-#  pragma intrinsic (_BitScanReverse64)
-# endif
-# define COUNT_LEADING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE)                \
-    do                                                                  \
-      {                                                                 \
-        unsigned long result;                                           \
-        if (MSC_BUILTIN (&result, x))                                   \
-          return CHAR_BIT * sizeof x - 1 - result;                      \
-        return CHAR_BIT * sizeof x;                                     \
-      }                                                                 \
-    while (0)
+/* Bytes must be 8 bits, as POSIX requires.  This implementation uses
+   8 instead of CHAR_BIT to avoid namespace pollution.  */
+static_assert ((unsigned char) -1 == (1 << 8) - 1);
+
+/* An expression, preferably with the type of A, that has the value of B.  */
+#if ((defined __GNUC__ && 2 <= __GNUC__) \
+     || (defined __clang_major__ && 4 <= __clang_major__) \
+     || (defined __IBMC__ && 1210 <= __IBMC__ && defined __IBM__TYPEOF__) \
+     || (defined __SUNPRO_C && 0x5110 <= __SUNPRO_C && !__STDC__))
+# define _GL_STDBIT_TYPEOF_CAST(a, b) ((__typeof__ (a)) (b))
+#elif 202311 <= __STDC_VERSION__
+# define _GL_STDBIT_TYPEOF_CAST(a, b) ((typeof (a)) (b))
 #else
-# define COUNT_LEADING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE)                \
-    do                                                                  \
-      {                                                                 \
-        int count;                                                      \
-        unsigned int leading_32;                                        \
-        if (! x)                                                        \
-          return CHAR_BIT * sizeof x;                                   \
-        for (count = 0;                                                 \
-             (leading_32 = ((x >> (sizeof (TYPE) * CHAR_BIT - 32))      \
-                            & 0xffffffffU),                             \
-              count < CHAR_BIT * sizeof x - 32 && !leading_32);         \
-             count += 32)                                               \
-          x = x << 31 << 1;                                             \
-        return count + count_leading_zeros_32 (leading_32);             \
-      }                                                                 \
-    while (0)
-
-/* Compute and return the number of leading zeros in X,
-   where 0 < X < 2**32.  */
-COUNT_LEADING_ZEROS_INLINE int
-count_leading_zeros_32 (unsigned int x)
-{
-  /* <https://github.com/gibsjose/BitHacks>
-     <https://www.fit.vutbr.cz/~ibarina/pub/bithacks.pdf> */
-  static const char de_Bruijn_lookup[32] = {
-    31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
-    23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
-  };
-
-  x |= x >> 1;
-  x |= x >> 2;
-  x |= x >> 4;
-  x |= x >> 8;
-  x |= x >> 16;
-  return de_Bruijn_lookup[((x * 0x07c4acddU) & 0xffffffffU) >> 27];
-}
+/* This platform is so old that it lacks typeof, so _Generic is likely
+   missing or unreliable.  The C23 standard seems to allow yielding B
+   (which is always unsigned long long int), so do that.  */
+# define _GL_STDBIT_TYPEOF_CAST(a, b) (b)
 #endif
 
-/* Compute and return the number of leading zeros in X. */
-COUNT_LEADING_ZEROS_INLINE int
-count_leading_zeros (unsigned int x)
-{
-  COUNT_LEADING_ZEROS (__builtin_clz, _BitScanReverse, unsigned int);
-}
-
-/* Compute and return the number of leading zeros in X. */
-COUNT_LEADING_ZEROS_INLINE int
-count_leading_zeros_l (unsigned long int x)
-{
-  COUNT_LEADING_ZEROS (__builtin_clzl, _BitScanReverse, unsigned long int);
-}
+#define __STDC_VERSION_STDBIT_H__ 202311L
 
-/* Compute and return the number of leading zeros in X. */
-COUNT_LEADING_ZEROS_INLINE int
-count_leading_zeros_ll (unsigned long long int x)
-{
-#if (defined _MSC_VER && !defined __clang__) && !defined _M_X64
-  /* 32-bit MSVC does not have _BitScanReverse64, only _BitScanReverse.  */
-  unsigned long result;
-  if (_BitScanReverse (&result, (unsigned long) (x >> 32)))
-    return CHAR_BIT * sizeof x - 1 - 32 - result;
-  if (_BitScanReverse (&result, (unsigned long) x))
-    return CHAR_BIT * sizeof x - 1 - result;
-  return CHAR_BIT * sizeof x;
+#define __STDC_ENDIAN_BIG__ 4321
+#define __STDC_ENDIAN_LITTLE__ 1234
+#ifdef WORDS_BIGENDIAN
+# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_BIG__
 #else
-  COUNT_LEADING_ZEROS (__builtin_clzll, _BitScanReverse64,
-                       unsigned long long int);
+# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_LITTLE__
 #endif
-}
 
 #ifdef __cplusplus
-}
+extern "C" {
 #endif
 
-_GL_INLINE_HEADER_END
+#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) || 4 <= __clang_major__
+# define _GL_STDBIT_HAS_BUILTIN_CLZ true
+# define _GL_STDBIT_HAS_BUILTIN_CTZ true
+# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true
+#elif defined __has_builtin
+# if (__has_builtin (__builtin_clz) \
+      && __has_builtin (__builtin_clzl) \
+      && __has_builtin (__builtin_clzll))
+#  define _GL_STDBIT_HAS_BUILTIN_CLZ true
+# endif
+# if (__has_builtin (__builtin_ctz) \
+      && __has_builtin (__builtin_ctzl) \
+      && __has_builtin (__builtin_ctzll))
+#  define _GL_STDBIT_HAS_BUILTIN_CTZ true
+# endif
+# if (__has_builtin (__builtin_popcount) \
+      && __has_builtin (__builtin_popcountl) \
+      && __has_builtin (__builtin_popcountll))
+#  define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true
+# endif
+#endif
 
-/* Taken from count-trailing-zeros.h.  */
-#include <limits.h>
-#include <stdlib.h>
+/* Count leading zeros of nonzero N.  */
+#ifdef _GL_STDBIT_HAS_BUILTIN_CLZ
+# define __gl_stdbit_clz __builtin_clz
+# define __gl_stdbit_clzl __builtin_clzl
+# define __gl_stdbit_clzll __builtin_clzll
+#elif defined _MSC_VER
+# pragma intrinsic (_BitScanReverse)
+# ifdef _M_X64
+#  pragma intrinsic (_BitScanReverse64)
+# endif
+_GL_STDBIT_INLINE int
+__gl_stdbit_clzl (unsigned long int n)
+{
+  unsigned long int r;
+  _BitScanReverse (&r, n);
+  return 8 * sizeof n - 1 - r;
+}
+_GL_STDBIT_INLINE int
+__gl_stdbit_clz (unsigned int n)
+{
+  return __gl_stdbit_clzl (n) - 8 * (sizeof 0ul - sizeof n);
+}
+_GL_STDBIT_INLINE int
+__gl_stdbit_clzll (unsigned long long int n)
+{
+# ifdef _M_X64
+  unsigned long int r;
+  _BitScanReverse64 (&r, n);
+  return 64 - 1 - r;
+# else
+  unsigned long int hi = n >> 32;
+  return __gl_stdbit_clzl (hi ? hi : n) + (hi ? 0 : 32);
+# endif
+}
 
-_GL_INLINE_HEADER_BEGIN
-#ifndef COUNT_TRAILING_ZEROS_INLINE
-# define COUNT_TRAILING_ZEROS_INLINE _GL_INLINE
-#endif
+#else /* !_MSC_VER */
 
-#ifdef __cplusplus
-extern "C" {
+extern char const __gl_stdbit_clztab[256];
+
+_GL_STDBIT_INLINE int
+__gl_stdbit_clzll (unsigned long long int n)
+{
+  static_assert (8 * sizeof n <= 1 << 7);
+  int r = 0;
+  int a6 = (0xffffffffffffffff < n) << 6; n >>= a6; r |= a6;
+  int a5 = (0x00000000ffffffff < n) << 5; n >>= a5; r |= a5;
+  int a4 = (0x000000000000ffff < n) << 4; n >>= a4; r |= a4;
+  int a3 = (0x00000000000000ff < n) << 3; n >>= a3; r |= a3;
+  return (8 * (sizeof n - 1) - r) | __gl_stdbit_clztab[n];
+}
+_GL_STDBIT_INLINE int
+__gl_stdbit_clz (unsigned int n)
+{
+  return __gl_stdbit_clzll (n) - 8 * (sizeof 0ull - sizeof 0u);
+}
+_GL_STDBIT_INLINE int
+__gl_stdbit_clzl (unsigned long int n)
+{
+  return __gl_stdbit_clzll (n) - 8 * (sizeof 0ull - sizeof 0ul);
+}
 #endif
 
-/* Assuming the GCC builtin is BUILTIN and the MSC builtin is MSC_BUILTIN,
-   expand to code that computes the number of trailing zeros of the local
-   variable 'x' of type TYPE (an unsigned integer type) and return it
-   from the current function.  */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) \
-    || (__clang_major__ >= 4)
-# define COUNT_TRAILING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE)               \
-  return x ? BUILTIN (x) : CHAR_BIT * sizeof x;
-#elif _MSC_VER
+#ifdef _GL_STDBIT_HAS_BUILTIN_CTZ
+# define __gl_stdbit_ctz __builtin_ctz
+# define __gl_stdbit_ctzl __builtin_ctzl
+# define __gl_stdbit_ctzll __builtin_ctzll
+#elif defined _MSC_VER
 # pragma intrinsic (_BitScanForward)
-# if defined _M_X64
+# ifdef _M_X64
 #  pragma intrinsic (_BitScanForward64)
 # endif
-# define COUNT_TRAILING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE)               \
-    do                                                                  \
-      {                                                                 \
-        unsigned long result;                                           \
-        return MSC_BUILTIN (&result, x) ? result : CHAR_BIT * sizeof x; \
-      }                                                                 \
-    while (0)
-#else
-# define COUNT_TRAILING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE)               \
-    do                                                                  \
-      {                                                                 \
-        int count = 0;                                                  \
-        if (! x)                                                        \
-          return CHAR_BIT * sizeof x;                                   \
-        for (count = 0;                                                 \
-             (count < CHAR_BIT * sizeof x - 32                          \
-              && ! (x & 0xffffffffU));                                  \
-             count += 32)                                               \
-          x = x >> 31 >> 1;                                             \
-        return count + count_trailing_zeros_32 (x);                     \
-      }                                                                 \
-    while (0)
-
-/* Compute and return the number of trailing zeros in the least
-   significant 32 bits of X.  One of these bits must be nonzero.  */
-COUNT_TRAILING_ZEROS_INLINE int
-count_trailing_zeros_32 (unsigned int x)
-{
-  /* <https://github.com/gibsjose/BitHacks>
-     <https://www.fit.vutbr.cz/~ibarina/pub/bithacks.pdf> */
-  static const char de_Bruijn_lookup[32] = {
-    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
-    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
-  };
-  return de_Bruijn_lookup[(((x & -x) * 0x077cb531U) & 0xffffffffU) >> 27];
+_GL_STDBIT_INLINE int
+__gl_stdbit_ctzl (unsigned long int n)
+{
+  unsigned long int r;
+  _BitScanForward (&r, n);
+  return r;
 }
-#endif
-
-/* Compute and return the number of trailing zeros in X. */
-COUNT_TRAILING_ZEROS_INLINE int
-count_trailing_zeros (unsigned int x)
+_GL_STDBIT_INLINE int
+__gl_stdbit_ctz (unsigned int n)
 {
-  COUNT_TRAILING_ZEROS (__builtin_ctz, _BitScanForward, unsigned int);
+  return __gl_stdbit_ctzl (n);
 }
-
-/* Compute and return the number of trailing zeros in X. */
-COUNT_TRAILING_ZEROS_INLINE int
-count_trailing_zeros_l (unsigned long int x)
+_GL_STDBIT_INLINE int
+__gl_stdbit_ctzll (unsigned long long int n)
 {
-  COUNT_TRAILING_ZEROS (__builtin_ctzl, _BitScanForward, unsigned long int);
+# ifdef _M_X64
+  unsigned long int r;
+  _BitScanForward64 (&r, n);
+  return r;
+# else
+  unsigned int lo = n;
+  return __gl_stdbit_ctzl (lo ? lo : n >> 32) + (lo ? 0 : 32);
+# endif
+_GL_STDBIT_INLINE int
 }
 
-/* Compute and return the number of trailing zeros in X. */
-COUNT_TRAILING_ZEROS_INLINE int
-count_trailing_zeros_ll (unsigned long long int x)
+#else /* !_MSC_VER */
+
+_GL_STDBIT_INLINE int
+__gl_stdbit_ctz (unsigned int n)
 {
-#if (defined _MSC_VER && !defined __clang__) && !defined _M_X64
-  /* 32-bit MSVC does not have _BitScanForward64, only _BitScanForward.  */
-  unsigned long result;
-  if (_BitScanForward (&result, (unsigned long) x))
-    return result;
-  if (_BitScanForward (&result, (unsigned long) (x >> 32)))
-    return result + 32;
-  return CHAR_BIT * sizeof x;
-#else
-  COUNT_TRAILING_ZEROS (__builtin_ctzll, _BitScanForward64,
-                        unsigned long long int);
-#endif
+  return 8 * sizeof n - 1 - __gl_stdbit_clz (n & -n);
 }
-
-#ifdef __cplusplus
+_GL_STDBIT_INLINE int
+__gl_stdbit_ctzl (unsigned long int n)
+{
+  return 8 * sizeof n - 1 - __gl_stdbit_clzl (n & -n);
+}
+_GL_STDBIT_INLINE int
+__gl_stdbit_ctzll (unsigned long long int n)
+{
+  return 8 * sizeof n - 1 - __gl_stdbit_clzll (n & -n);
 }
 #endif
 
-_GL_INLINE_HEADER_END
-
-/* Taken from count-one-bits.h.  */
-#include <limits.h>
-#include <stdlib.h>
-
-_GL_INLINE_HEADER_BEGIN
-#ifndef COUNT_ONE_BITS_INLINE
-# define COUNT_ONE_BITS_INLINE _GL_INLINE
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Assuming the GCC builtin is GCC_BUILTIN and the MSC builtin is MSC_BUILTIN,
-   expand to code that computes the number of 1-bits of the local
-   variable 'x' of type TYPE (an unsigned integer type) and return it
-   from the current function.  */
-#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \
-    || (__clang_major__ >= 4)
-# define COUNT_ONE_BITS(GCC_BUILTIN, MSC_BUILTIN, TYPE) \
-    return GCC_BUILTIN (x)
+#ifdef _GL_STDBIT_HAS_BUILTIN_POPCOUNT
+# define __gl_stdbit_popcount __builtin_popcount
+# define __gl_stdbit_popcountl __builtin_popcountl
+# define __gl_stdbit_popcountll __builtin_popcountll
 #else
-
-/* Compute and return the number of 1-bits set in the least
-   significant 32 bits of X. */
-COUNT_ONE_BITS_INLINE int
-count_one_bits_32 (unsigned int x)
-{
-  x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U);
-  x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U);
-  x = (x >> 16) + (x & 0xffff);
-  x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);
-  return (x >> 8) + (x & 0x00ff);
-}
-
-/* Expand to code that computes the number of 1-bits of the local
-   variable 'x' of type TYPE (an unsigned integer type) and return it
-   from the current function.  */
-# define COUNT_ONE_BITS_GENERIC(TYPE)                                   \
-    do                                                                  \
-      {                                                                 \
-        int count = 0;                                                  \
-        int bits;                                                       \
-        for (bits = 0; bits < sizeof (TYPE) * CHAR_BIT; bits += 32)     \
-          {                                                             \
-            count += count_one_bits_32 (x);                             \
-            x = x >> 31 >> 1;                                           \
-          }                                                             \
-        return count;                                                   \
-      }                                                                 \
-    while (0)
-
-# if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64)
-
-/* While gcc falls back to its own generic code if the machine
-   on which it's running doesn't support popcount, with Microsoft's
-   compiler we need to detect and fallback ourselves.  */
-
-#  if 0
-#   include <intrin.h>
-#  else
-    /* Don't pollute the namespace with too many MSVC intrinsics.  */
+/* Count the number of 1 bits in N.  */
+_GL_STDBIT_INLINE int
+__gl_stdbit_popcount255 (unsigned long long int n)
+{
+  unsigned long long int
+    max = -1ull,
+    x555555 = max / (1 << 1 | 1),      /* 0x555555... */
+    x333333 = max / (1 << 2 | 1),      /* 0x333333... */
+    x0f0f0f = max / (1 << 4 | 1),      /* 0x0f0f0f... */
+    x010101 = max / ((1 << 8) - 1);    /* 0x010101... */
+  n -= (n >> 1) & x555555;
+  n = (n & x333333) + ((n >> 2) & x333333);
+  n = (n + (n >> 4)) & x0f0f0f;
+
+  /* The multiplication means the popcount should fit in the leading 8
+     bits of the product, so N should be narrower than 256 bits.  */
+  static_assert (8 * sizeof n < 1 << 8);
+  return n * x010101 >> 8 * (sizeof n - 1);
+}
+
+# ifdef _MSC_VER
+#  if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64)
 #   pragma intrinsic (__cpuid)
 #   pragma intrinsic (__popcnt)
-#   if defined _M_X64
+#   ifdef _M_X64
 #    pragma intrinsic (__popcnt64)
-#   endif
-#  endif
-
-#  if !defined _M_X64
-static inline __popcnt64 (unsigned long long x)
+#   else
+_GL_STDBIT_INLINE int
+__popcnt64 (unsigned long long int n)
 {
-  return __popcnt ((unsigned int) (x >> 32)) + __popcnt ((unsigned int) x);
+  return __popcnt (n >> 32) + __popcnt (n);
 }
+#   endif
 #  endif
 
-/* Return nonzero if popcount is supported.  */
-
-/* 1 if supported, 0 if not supported, -1 if unknown.  */
-extern int popcount_support;
+/* 1 if supported, -1 if not, 0 if unknown.  */
+extern signed char __gl_stdint_popcount_support;
 
-COUNT_ONE_BITS_INLINE int
-popcount_supported (void)
+_GL_STDBIT_INLINE bool
+__gl_stdbit_popcount_supported (void)
 {
-  if (popcount_support < 0)
+  if (!__gl_stdbit_popcount_support)
     {
       /* Do as described in
-         
<https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64> */
+         
<https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64>
+         Although Microsoft started requiring POPCNT in MS-Windows 11 24H2,
+         we'll be more cautious.  */
       int cpu_info[4];
       __cpuid (cpu_info, 1);
-      popcount_support = (cpu_info[2] >> 23) & 1;
+      __gl_stdbit_popcount_support = cpu_info[2] & 1 << 23 ? 1 : -1;
     }
-  return popcount_support;
+  return 0 < __gl_stdbit_popcount_support;
 }
-
-#  define COUNT_ONE_BITS(GCC_BUILTIN, MSC_BUILTIN, TYPE) \
-     do                                                  \
-       {                                                 \
-         if (popcount_supported ())                      \
-           return MSC_BUILTIN (x);                       \
-         else                                            \
-           COUNT_ONE_BITS_GENERIC (TYPE);                \
-       }                                                 \
-     while (0)
-
-# else
-
-#  define COUNT_ONE_BITS(GCC_BUILTIN, MSC_BUILTIN, TYPE) \
-     COUNT_ONE_BITS_GENERIC (TYPE)
-
-# endif
-#endif
-
-/* Compute and return the number of 1-bits set in X. */
-COUNT_ONE_BITS_INLINE int
-count_one_bits (unsigned int x)
+_GL_STDBIT_INLINE int
+__gl_stdbit_popcount (unsigned int n)
 {
-  COUNT_ONE_BITS (__builtin_popcount, __popcnt, unsigned int);
+  return (__gl_stdbit_popcount_supported ()
+          ? __popcnt (n)
+          : __gl_stdbit_popcount255 (n));
 }
-
-/* Compute and return the number of 1-bits set in X. */
-COUNT_ONE_BITS_INLINE int
-count_one_bits_l (unsigned long int x)
+_GL_STDBIT_INLINE int
+__gl_stdbit_popcountl (unsigned long int n)
 {
-  COUNT_ONE_BITS (__builtin_popcountl, __popcnt, unsigned long int);
+  return (__gl_stdbit_popcount_supported ()
+          ? __popcnt (n)
+          : __gl_stdbit_popcount255 (n));
 }
-
-/* Compute and return the number of 1-bits set in X. */
-COUNT_ONE_BITS_INLINE int
-count_one_bits_ll (unsigned long long int x)
+_GL_STDBIT_INLINE int
+__gl_stdbit_popcountll (unsigned long long int n)
 {
-  COUNT_ONE_BITS (__builtin_popcountll, __popcnt64, unsigned long long int);
+  return (__gl_stdbit_popcount_supported ()
+          ? __popcnt64 (n)
+          : __gl_stdbit_popcount255 (n));
 }
-
-#ifdef __cplusplus
-}
-#endif
-
-_GL_INLINE_HEADER_END
-
-
-/* An expression, preferably with the type of A, that has the value of B.  */
-#if ((defined __GNUC__ && 2 <= __GNUC__) \
-     || (defined __clang_major__ && 4 <= __clang_major__) \
-     || (defined __IBMC__ && 1210 <= __IBMC__ && defined __IBM__TYPEOF__) \
-     || (defined __SUNPRO_C && 0x5110 <= __SUNPRO_C && !__STDC__))
-# define _GL_STDBIT_TYPEOF_CAST(a, b) ((__typeof__ (a)) (b))
-#elif 202311 <= __STDC_VERSION__
-# define _GL_STDBIT_TYPEOF_CAST(a, b) ((typeof (a)) (b))
-#else
-/* This platform is so old that it lacks typeof, so _Generic is likely
-   missing or unreliable.  The C23 standard seems to allow yielding B
-   (which is always unsigned long long int), so do that.  */
-# define _GL_STDBIT_TYPEOF_CAST(a, b) (b)
-#endif
-
-#define __STDC_VERSION_STDBIT_H__ 202311L
-
-#define __STDC_ENDIAN_BIG__ 4321
-#define __STDC_ENDIAN_LITTLE__ 1234
-#ifdef WORDS_BIGENDIAN
-# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_BIG__
-#else
-# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_LITTLE__
-#endif
-
-_GL_INLINE_HEADER_BEGIN
-#ifndef _GL_STDBIT_INLINE
-# define _GL_STDBIT_INLINE _GL_INLINE
-#endif
-
-#ifdef __cplusplus
-extern "C" {
+# else /* !_MSC_VER */
+#  define __gl_stdbit_popcount __gl_stdbit_popcount255
+#  define __gl_stdbit_popcountl __gl_stdbit_popcount255
+#  define __gl_stdbit_popcountll __gl_stdbit_popcount255
+# endif
 #endif
 
 
 _GL_STDBIT_INLINE unsigned int
 stdc_leading_zeros_ui (unsigned int n)
 {
-  return count_leading_zeros (n);
+  return n ? __gl_stdbit_clz (n) : 8 * sizeof n;
 }
 
 _GL_STDBIT_INLINE unsigned int
@@ -436,13 +313,13 @@ stdc_leading_zeros_us (unsigned short int n)
 _GL_STDBIT_INLINE unsigned int
 stdc_leading_zeros_ul (unsigned long int n)
 {
-  return count_leading_zeros_l (n);
+  return n ? __gl_stdbit_clzl (n) : 8 * sizeof n;
 }
 
 _GL_STDBIT_INLINE unsigned int
 stdc_leading_zeros_ull (unsigned long long int n)
 {
-  return count_leading_zeros_ll (n);
+  return n ? __gl_stdbit_clzll (n) : 8 * sizeof n;
 }
 
 #define stdc_leading_zeros(n) \
@@ -494,7 +371,7 @@ stdc_leading_ones_ull (unsigned long long int n)
 _GL_STDBIT_INLINE unsigned int
 stdc_trailing_zeros_ui (unsigned int n)
 {
-  return count_trailing_zeros (n);
+  return n ? __gl_stdbit_ctz (n) : 8 * sizeof n;
 }
 
 _GL_STDBIT_INLINE unsigned int
@@ -512,13 +389,13 @@ stdc_trailing_zeros_us (unsigned short int n)
 _GL_STDBIT_INLINE unsigned int
 stdc_trailing_zeros_ul (unsigned long int n)
 {
-  return count_trailing_zeros_l (n);
+  return n ? __gl_stdbit_ctzl (n) : 8 * sizeof n;
 }
 
 _GL_STDBIT_INLINE unsigned int
 stdc_trailing_zeros_ull (unsigned long long int n)
 {
-  return count_trailing_zeros_ll (n);
+  return n ? __gl_stdbit_ctzll (n) : 8 * sizeof n;
 }
 
 #define stdc_trailing_zeros(n) \
@@ -742,7 +619,7 @@ stdc_first_trailing_one_ull (unsigned long long int n)
 _GL_STDBIT_INLINE unsigned int
 stdc_count_ones_ui (unsigned int n)
 {
-  return count_one_bits (n);
+  return __gl_stdbit_popcount (n);
 }
 
 _GL_STDBIT_INLINE unsigned int
@@ -760,13 +637,13 @@ stdc_count_ones_us (unsigned short int n)
 _GL_STDBIT_INLINE unsigned int
 stdc_count_ones_ul (unsigned long int n)
 {
-  return count_one_bits_l (n);
+  return __gl_stdbit_popcountl (n);
 }
 
 _GL_STDBIT_INLINE unsigned int
 stdc_count_ones_ull (unsigned long long int n)
 {
-  return count_one_bits_ll (n);
+  return __gl_stdbit_popcountll (n);
 }
 
 #define stdc_count_ones(n) \
diff --git a/modules/stdbit b/modules/stdbit
index f70386566b..e8144ab71a 100644
--- a/modules/stdbit
+++ b/modules/stdbit
@@ -7,6 +7,7 @@ lib/stdbit.in.h
 m4/stdbit_h.m4
 
 Depends-on:
+assert-h                [$GL_GENERATE_STDBIT_H]
 stdbool                 [$GL_GENERATE_STDBIT_H]
 
 configure.ac:
-- 
2.44.0

[PATCH 4/4] stdbit: clean up namespace and simplify

Reply via email to