Fix namespace pollution in substitute stdbit.h. Clean up and simplify some of the non-GCC code, by preferring inline functions to macros and substituting something more straightforward than a de Bruijn hash (possibly faster?). The non-GCC non-C23 substitutes should all compile to branch-free code, if the compiler is good. * lib/stdbit.c (COUNT_LEADING_ZEROS_INLINE) (COUNT_TRAILING_ZEROS_INLINE, COUNT_ONE_BITS_INLINE): Remove. (__gl_stdbit_clztab) [!_GL_STDBIT_HAS_BUILTIN_CLZ && !_MSC_VER]: New constant array. (__gl_stdbit_popcount_support): Adjust to stdbit.in.h changes. * lib/stdbit.in.h: Do not include <limits.h> or <stdlib.h>. Check that bytes are 8 bits. (COUNT_LEADING_ZEROS_INLINE, COUNT_TRAILING_ZEROS_INLINE) (COUNT_ONE_BITS_INLINE, COUNT_LEADING_ZEROS) (count_leading_zeros_32, count_leading_zeros) (count_leading_zeros_l, count_leading_zeros_ll) (COUNT_TRAILING_ZEROS, count_trailing_zeros_32) (count_trailing_zeros, count_trailing_zeros_l) (count_trailing_zeros_ll, COUNT_ONE_BITS, count_one_bits_32) (COUNT_ONE_BITS_GENERIC, count_one_bits, count_one_bits_l) (count_one_bits_ll): Remove, replacing all uses with ... (_GL_STDBIT_HAS_BUILTIN_CLZ) (_GL_STDBIT_HAS_BUILTIN_CTZ, _GL_STDBIT_HAS_BUILTIN_POPCOUNT) (__gl_stdbit_clz, __gl_stdbit_clzl, __gl_stdbit_clzll) (__gl_stdbit_ctz, __gl_stdbit_ctzl, __gl_stdbit_ctzll) (__gl_stdbit_popcount, __gl_stdbit_popcountl, __gl_stdbit_popcountll) (__gl_stdbit_popcount255): ... these new functions and macros. (__popcnt64): Omit unnecessary casts. (__gl_stdbit_popcount_support): Rename from popcount_support and make it a signed char since that’s all we need. (__gl_stdbit_popcount_supported): Rename from popcount_supported. All uses changed. * modules/stdbit (Depends-on): Add assert-h, for static_assert. --- ChangeLog | 36 ++++ lib/stdbit.c | 50 ++++- lib/stdbit.in.h | 553 +++++++++++++++++++----------------------------- modules/stdbit | 1 + 4 files changed, 298 insertions(+), 342 deletions(-)
diff --git a/ChangeLog b/ChangeLog index b0e7efd02c..b4319fc47b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,41 @@ 2024-05-11 Paul Eggert <egg...@cs.ucla.edu> + stdbit: clean up namespace and simplify + Fix namespace pollution in substitute stdbit.h. + Clean up and simplify some of the non-GCC code, by preferring + inline functions to macros and substituting something + more straightforward than a de Bruijn hash (possibly faster?). + The non-GCC non-C23 substitutes should all compile to + branch-free code, if the compiler is good. + * lib/stdbit.c (COUNT_LEADING_ZEROS_INLINE) + (COUNT_TRAILING_ZEROS_INLINE, COUNT_ONE_BITS_INLINE): Remove. + (__gl_stdbit_clztab) [!_GL_STDBIT_HAS_BUILTIN_CLZ && !_MSC_VER]: + New constant array. + (__gl_stdbit_popcount_support): Adjust to stdbit.in.h changes. + * lib/stdbit.in.h: Do not include <limits.h> or <stdlib.h>. + Check that bytes are 8 bits. + (COUNT_LEADING_ZEROS_INLINE, COUNT_TRAILING_ZEROS_INLINE) + (COUNT_ONE_BITS_INLINE, COUNT_LEADING_ZEROS) + (count_leading_zeros_32, count_leading_zeros) + (count_leading_zeros_l, count_leading_zeros_ll) + (COUNT_TRAILING_ZEROS, count_trailing_zeros_32) + (count_trailing_zeros, count_trailing_zeros_l) + (count_trailing_zeros_ll, COUNT_ONE_BITS, count_one_bits_32) + (COUNT_ONE_BITS_GENERIC, count_one_bits, count_one_bits_l) + (count_one_bits_ll): Remove, replacing all uses with ... + (_GL_STDBIT_HAS_BUILTIN_CLZ) + (_GL_STDBIT_HAS_BUILTIN_CTZ, _GL_STDBIT_HAS_BUILTIN_POPCOUNT) + (__gl_stdbit_clz, __gl_stdbit_clzl, __gl_stdbit_clzll) + (__gl_stdbit_ctz, __gl_stdbit_ctzl, __gl_stdbit_ctzll) + (__gl_stdbit_popcount, __gl_stdbit_popcountl, __gl_stdbit_popcountll) + (__gl_stdbit_popcount255): ... these new functions and macros. + (__popcnt64): Omit unnecessary casts. + (__gl_stdbit_popcount_support): Rename from popcount_support + and make it a signed char since that’s all we need. + (__gl_stdbit_popcount_supported): Rename from popcount_supported. + All uses changed. + * modules/stdbit (Depends-on): Add assert-h, for static_assert. + stdbit: remove most module dependence Remove dependence of stdbit on the modules count-leading-zeros, count-trailing-zeros, and count-one-bits. stdbit is part of C23 diff --git a/lib/stdbit.c b/lib/stdbit.c index 2a5626a902..f2a51b10f7 100644 --- a/lib/stdbit.c +++ b/lib/stdbit.c @@ -1,9 +1,51 @@ +/* Support C23 bit and byte utilities on non-C23 platforms. + + Copyright 2024 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Paul Eggert. */ + #include <config.h> + #define _GL_STDBIT_INLINE _GL_EXTERN_INLINE -#define COUNT_LEADING_ZEROS_INLINE _GL_EXTERN_INLINE -#define COUNT_TRAILING_ZEROS_INLINE _GL_EXTERN_INLINE -#define COUNT_ONE_BITS_INLINE _GL_EXTERN_INLINE #include <stdbit.h> + +#if !defined _GL_STDBIT_HAS_BUILTIN_CLZ && !_MSC_VER +/* __gl_stdbit_clztab[B] is the number of leading zeros in + the 8-bit byte with value B. */ +char const __gl_stdbit_clztab[256] = + { + 8, + 7, + 6, 6, + 5, 5, 5, 5, + 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + /* The rest is zero. */ + }; +#endif + #if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64) -int popcount_support = -1; +signed char __gl_stdbit_popcount_support; #endif diff --git a/lib/stdbit.in.h b/lib/stdbit.in.h index 63be4737a8..ed68bd6513 100644 --- a/lib/stdbit.in.h +++ b/lib/stdbit.in.h @@ -25,400 +25,277 @@ #error "Please include config.h first." #endif -/* Taken from count-leading-zeros.h. */ -#include <limits.h> -#include <stdlib.h> - _GL_INLINE_HEADER_BEGIN -#ifndef COUNT_LEADING_ZEROS_INLINE -# define COUNT_LEADING_ZEROS_INLINE _GL_INLINE +#ifndef _GL_STDBIT_INLINE +# define _GL_STDBIT_INLINE _GL_INLINE #endif #ifdef __cplusplus extern "C" { #endif -/* Assuming the GCC builtin is BUILTIN and the MSC builtin is MSC_BUILTIN, - expand to code that computes the number of leading zeros of the local - variable 'x' of type TYPE (an unsigned integer type) and return it - from the current function. */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) \ - || (__clang_major__ >= 4) -# define COUNT_LEADING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE) \ - return x ? BUILTIN (x) : CHAR_BIT * sizeof x; -#elif _MSC_VER -# pragma intrinsic (_BitScanReverse) -# if defined _M_X64 -# pragma intrinsic (_BitScanReverse64) -# endif -# define COUNT_LEADING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE) \ - do \ - { \ - unsigned long result; \ - if (MSC_BUILTIN (&result, x)) \ - return CHAR_BIT * sizeof x - 1 - result; \ - return CHAR_BIT * sizeof x; \ - } \ - while (0) +/* Bytes must be 8 bits, as POSIX requires. This implementation uses + 8 instead of CHAR_BIT to avoid namespace pollution. */ +static_assert ((unsigned char) -1 == (1 << 8) - 1); + +/* An expression, preferably with the type of A, that has the value of B. */ +#if ((defined __GNUC__ && 2 <= __GNUC__) \ + || (defined __clang_major__ && 4 <= __clang_major__) \ + || (defined __IBMC__ && 1210 <= __IBMC__ && defined __IBM__TYPEOF__) \ + || (defined __SUNPRO_C && 0x5110 <= __SUNPRO_C && !__STDC__)) +# define _GL_STDBIT_TYPEOF_CAST(a, b) ((__typeof__ (a)) (b)) +#elif 202311 <= __STDC_VERSION__ +# define _GL_STDBIT_TYPEOF_CAST(a, b) ((typeof (a)) (b)) #else -# define COUNT_LEADING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE) \ - do \ - { \ - int count; \ - unsigned int leading_32; \ - if (! x) \ - return CHAR_BIT * sizeof x; \ - for (count = 0; \ - (leading_32 = ((x >> (sizeof (TYPE) * CHAR_BIT - 32)) \ - & 0xffffffffU), \ - count < CHAR_BIT * sizeof x - 32 && !leading_32); \ - count += 32) \ - x = x << 31 << 1; \ - return count + count_leading_zeros_32 (leading_32); \ - } \ - while (0) - -/* Compute and return the number of leading zeros in X, - where 0 < X < 2**32. */ -COUNT_LEADING_ZEROS_INLINE int -count_leading_zeros_32 (unsigned int x) -{ - /* <https://github.com/gibsjose/BitHacks> - <https://www.fit.vutbr.cz/~ibarina/pub/bithacks.pdf> */ - static const char de_Bruijn_lookup[32] = { - 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, - 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 - }; - - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - return de_Bruijn_lookup[((x * 0x07c4acddU) & 0xffffffffU) >> 27]; -} +/* This platform is so old that it lacks typeof, so _Generic is likely + missing or unreliable. The C23 standard seems to allow yielding B + (which is always unsigned long long int), so do that. */ +# define _GL_STDBIT_TYPEOF_CAST(a, b) (b) #endif -/* Compute and return the number of leading zeros in X. */ -COUNT_LEADING_ZEROS_INLINE int -count_leading_zeros (unsigned int x) -{ - COUNT_LEADING_ZEROS (__builtin_clz, _BitScanReverse, unsigned int); -} - -/* Compute and return the number of leading zeros in X. */ -COUNT_LEADING_ZEROS_INLINE int -count_leading_zeros_l (unsigned long int x) -{ - COUNT_LEADING_ZEROS (__builtin_clzl, _BitScanReverse, unsigned long int); -} +#define __STDC_VERSION_STDBIT_H__ 202311L -/* Compute and return the number of leading zeros in X. */ -COUNT_LEADING_ZEROS_INLINE int -count_leading_zeros_ll (unsigned long long int x) -{ -#if (defined _MSC_VER && !defined __clang__) && !defined _M_X64 - /* 32-bit MSVC does not have _BitScanReverse64, only _BitScanReverse. */ - unsigned long result; - if (_BitScanReverse (&result, (unsigned long) (x >> 32))) - return CHAR_BIT * sizeof x - 1 - 32 - result; - if (_BitScanReverse (&result, (unsigned long) x)) - return CHAR_BIT * sizeof x - 1 - result; - return CHAR_BIT * sizeof x; +#define __STDC_ENDIAN_BIG__ 4321 +#define __STDC_ENDIAN_LITTLE__ 1234 +#ifdef WORDS_BIGENDIAN +# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_BIG__ #else - COUNT_LEADING_ZEROS (__builtin_clzll, _BitScanReverse64, - unsigned long long int); +# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_LITTLE__ #endif -} #ifdef __cplusplus -} +extern "C" { #endif -_GL_INLINE_HEADER_END +#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) || 4 <= __clang_major__ +# define _GL_STDBIT_HAS_BUILTIN_CLZ true +# define _GL_STDBIT_HAS_BUILTIN_CTZ true +# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true +#elif defined __has_builtin +# if (__has_builtin (__builtin_clz) \ + && __has_builtin (__builtin_clzl) \ + && __has_builtin (__builtin_clzll)) +# define _GL_STDBIT_HAS_BUILTIN_CLZ true +# endif +# if (__has_builtin (__builtin_ctz) \ + && __has_builtin (__builtin_ctzl) \ + && __has_builtin (__builtin_ctzll)) +# define _GL_STDBIT_HAS_BUILTIN_CTZ true +# endif +# if (__has_builtin (__builtin_popcount) \ + && __has_builtin (__builtin_popcountl) \ + && __has_builtin (__builtin_popcountll)) +# define _GL_STDBIT_HAS_BUILTIN_POPCOUNT true +# endif +#endif -/* Taken from count-trailing-zeros.h. */ -#include <limits.h> -#include <stdlib.h> +/* Count leading zeros of nonzero N. */ +#ifdef _GL_STDBIT_HAS_BUILTIN_CLZ +# define __gl_stdbit_clz __builtin_clz +# define __gl_stdbit_clzl __builtin_clzl +# define __gl_stdbit_clzll __builtin_clzll +#elif defined _MSC_VER +# pragma intrinsic (_BitScanReverse) +# ifdef _M_X64 +# pragma intrinsic (_BitScanReverse64) +# endif +_GL_STDBIT_INLINE int +__gl_stdbit_clzl (unsigned long int n) +{ + unsigned long int r; + _BitScanReverse (&r, n); + return 8 * sizeof n - 1 - r; +} +_GL_STDBIT_INLINE int +__gl_stdbit_clz (unsigned int n) +{ + return __gl_stdbit_clzl (n) - 8 * (sizeof 0ul - sizeof n); +} +_GL_STDBIT_INLINE int +__gl_stdbit_clzll (unsigned long long int n) +{ +# ifdef _M_X64 + unsigned long int r; + _BitScanReverse64 (&r, n); + return 64 - 1 - r; +# else + unsigned long int hi = n >> 32; + return __gl_stdbit_clzl (hi ? hi : n) + (hi ? 0 : 32); +# endif +} -_GL_INLINE_HEADER_BEGIN -#ifndef COUNT_TRAILING_ZEROS_INLINE -# define COUNT_TRAILING_ZEROS_INLINE _GL_INLINE -#endif +#else /* !_MSC_VER */ -#ifdef __cplusplus -extern "C" { +extern char const __gl_stdbit_clztab[256]; + +_GL_STDBIT_INLINE int +__gl_stdbit_clzll (unsigned long long int n) +{ + static_assert (8 * sizeof n <= 1 << 7); + int r = 0; + int a6 = (0xffffffffffffffff < n) << 6; n >>= a6; r |= a6; + int a5 = (0x00000000ffffffff < n) << 5; n >>= a5; r |= a5; + int a4 = (0x000000000000ffff < n) << 4; n >>= a4; r |= a4; + int a3 = (0x00000000000000ff < n) << 3; n >>= a3; r |= a3; + return (8 * (sizeof n - 1) - r) | __gl_stdbit_clztab[n]; +} +_GL_STDBIT_INLINE int +__gl_stdbit_clz (unsigned int n) +{ + return __gl_stdbit_clzll (n) - 8 * (sizeof 0ull - sizeof 0u); +} +_GL_STDBIT_INLINE int +__gl_stdbit_clzl (unsigned long int n) +{ + return __gl_stdbit_clzll (n) - 8 * (sizeof 0ull - sizeof 0ul); +} #endif -/* Assuming the GCC builtin is BUILTIN and the MSC builtin is MSC_BUILTIN, - expand to code that computes the number of trailing zeros of the local - variable 'x' of type TYPE (an unsigned integer type) and return it - from the current function. */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) \ - || (__clang_major__ >= 4) -# define COUNT_TRAILING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE) \ - return x ? BUILTIN (x) : CHAR_BIT * sizeof x; -#elif _MSC_VER +#ifdef _GL_STDBIT_HAS_BUILTIN_CTZ +# define __gl_stdbit_ctz __builtin_ctz +# define __gl_stdbit_ctzl __builtin_ctzl +# define __gl_stdbit_ctzll __builtin_ctzll +#elif defined _MSC_VER # pragma intrinsic (_BitScanForward) -# if defined _M_X64 +# ifdef _M_X64 # pragma intrinsic (_BitScanForward64) # endif -# define COUNT_TRAILING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE) \ - do \ - { \ - unsigned long result; \ - return MSC_BUILTIN (&result, x) ? result : CHAR_BIT * sizeof x; \ - } \ - while (0) -#else -# define COUNT_TRAILING_ZEROS(BUILTIN, MSC_BUILTIN, TYPE) \ - do \ - { \ - int count = 0; \ - if (! x) \ - return CHAR_BIT * sizeof x; \ - for (count = 0; \ - (count < CHAR_BIT * sizeof x - 32 \ - && ! (x & 0xffffffffU)); \ - count += 32) \ - x = x >> 31 >> 1; \ - return count + count_trailing_zeros_32 (x); \ - } \ - while (0) - -/* Compute and return the number of trailing zeros in the least - significant 32 bits of X. One of these bits must be nonzero. */ -COUNT_TRAILING_ZEROS_INLINE int -count_trailing_zeros_32 (unsigned int x) -{ - /* <https://github.com/gibsjose/BitHacks> - <https://www.fit.vutbr.cz/~ibarina/pub/bithacks.pdf> */ - static const char de_Bruijn_lookup[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - return de_Bruijn_lookup[(((x & -x) * 0x077cb531U) & 0xffffffffU) >> 27]; +_GL_STDBIT_INLINE int +__gl_stdbit_ctzl (unsigned long int n) +{ + unsigned long int r; + _BitScanForward (&r, n); + return r; } -#endif - -/* Compute and return the number of trailing zeros in X. */ -COUNT_TRAILING_ZEROS_INLINE int -count_trailing_zeros (unsigned int x) +_GL_STDBIT_INLINE int +__gl_stdbit_ctz (unsigned int n) { - COUNT_TRAILING_ZEROS (__builtin_ctz, _BitScanForward, unsigned int); + return __gl_stdbit_ctzl (n); } - -/* Compute and return the number of trailing zeros in X. */ -COUNT_TRAILING_ZEROS_INLINE int -count_trailing_zeros_l (unsigned long int x) +_GL_STDBIT_INLINE int +__gl_stdbit_ctzll (unsigned long long int n) { - COUNT_TRAILING_ZEROS (__builtin_ctzl, _BitScanForward, unsigned long int); +# ifdef _M_X64 + unsigned long int r; + _BitScanForward64 (&r, n); + return r; +# else + unsigned int lo = n; + return __gl_stdbit_ctzl (lo ? lo : n >> 32) + (lo ? 0 : 32); +# endif +_GL_STDBIT_INLINE int } -/* Compute and return the number of trailing zeros in X. */ -COUNT_TRAILING_ZEROS_INLINE int -count_trailing_zeros_ll (unsigned long long int x) +#else /* !_MSC_VER */ + +_GL_STDBIT_INLINE int +__gl_stdbit_ctz (unsigned int n) { -#if (defined _MSC_VER && !defined __clang__) && !defined _M_X64 - /* 32-bit MSVC does not have _BitScanForward64, only _BitScanForward. */ - unsigned long result; - if (_BitScanForward (&result, (unsigned long) x)) - return result; - if (_BitScanForward (&result, (unsigned long) (x >> 32))) - return result + 32; - return CHAR_BIT * sizeof x; -#else - COUNT_TRAILING_ZEROS (__builtin_ctzll, _BitScanForward64, - unsigned long long int); -#endif + return 8 * sizeof n - 1 - __gl_stdbit_clz (n & -n); } - -#ifdef __cplusplus +_GL_STDBIT_INLINE int +__gl_stdbit_ctzl (unsigned long int n) +{ + return 8 * sizeof n - 1 - __gl_stdbit_clzl (n & -n); +} +_GL_STDBIT_INLINE int +__gl_stdbit_ctzll (unsigned long long int n) +{ + return 8 * sizeof n - 1 - __gl_stdbit_clzll (n & -n); } #endif -_GL_INLINE_HEADER_END - -/* Taken from count-one-bits.h. */ -#include <limits.h> -#include <stdlib.h> - -_GL_INLINE_HEADER_BEGIN -#ifndef COUNT_ONE_BITS_INLINE -# define COUNT_ONE_BITS_INLINE _GL_INLINE -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* Assuming the GCC builtin is GCC_BUILTIN and the MSC builtin is MSC_BUILTIN, - expand to code that computes the number of 1-bits of the local - variable 'x' of type TYPE (an unsigned integer type) and return it - from the current function. */ -#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ - || (__clang_major__ >= 4) -# define COUNT_ONE_BITS(GCC_BUILTIN, MSC_BUILTIN, TYPE) \ - return GCC_BUILTIN (x) +#ifdef _GL_STDBIT_HAS_BUILTIN_POPCOUNT +# define __gl_stdbit_popcount __builtin_popcount +# define __gl_stdbit_popcountl __builtin_popcountl +# define __gl_stdbit_popcountll __builtin_popcountll #else - -/* Compute and return the number of 1-bits set in the least - significant 32 bits of X. */ -COUNT_ONE_BITS_INLINE int -count_one_bits_32 (unsigned int x) -{ - x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); - x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); - x = (x >> 16) + (x & 0xffff); - x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); - return (x >> 8) + (x & 0x00ff); -} - -/* Expand to code that computes the number of 1-bits of the local - variable 'x' of type TYPE (an unsigned integer type) and return it - from the current function. */ -# define COUNT_ONE_BITS_GENERIC(TYPE) \ - do \ - { \ - int count = 0; \ - int bits; \ - for (bits = 0; bits < sizeof (TYPE) * CHAR_BIT; bits += 32) \ - { \ - count += count_one_bits_32 (x); \ - x = x >> 31 >> 1; \ - } \ - return count; \ - } \ - while (0) - -# if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64) - -/* While gcc falls back to its own generic code if the machine - on which it's running doesn't support popcount, with Microsoft's - compiler we need to detect and fallback ourselves. */ - -# if 0 -# include <intrin.h> -# else - /* Don't pollute the namespace with too many MSVC intrinsics. */ +/* Count the number of 1 bits in N. */ +_GL_STDBIT_INLINE int +__gl_stdbit_popcount255 (unsigned long long int n) +{ + unsigned long long int + max = -1ull, + x555555 = max / (1 << 1 | 1), /* 0x555555... */ + x333333 = max / (1 << 2 | 1), /* 0x333333... */ + x0f0f0f = max / (1 << 4 | 1), /* 0x0f0f0f... */ + x010101 = max / ((1 << 8) - 1); /* 0x010101... */ + n -= (n >> 1) & x555555; + n = (n & x333333) + ((n >> 2) & x333333); + n = (n + (n >> 4)) & x0f0f0f; + + /* The multiplication means the popcount should fit in the leading 8 + bits of the product, so N should be narrower than 256 bits. */ + static_assert (8 * sizeof n < 1 << 8); + return n * x010101 >> 8 * (sizeof n - 1); +} + +# ifdef _MSC_VER +# if 1500 <= _MSC_VER && (defined _M_IX86 || defined _M_X64) # pragma intrinsic (__cpuid) # pragma intrinsic (__popcnt) -# if defined _M_X64 +# ifdef _M_X64 # pragma intrinsic (__popcnt64) -# endif -# endif - -# if !defined _M_X64 -static inline __popcnt64 (unsigned long long x) +# else +_GL_STDBIT_INLINE int +__popcnt64 (unsigned long long int n) { - return __popcnt ((unsigned int) (x >> 32)) + __popcnt ((unsigned int) x); + return __popcnt (n >> 32) + __popcnt (n); } +# endif # endif -/* Return nonzero if popcount is supported. */ - -/* 1 if supported, 0 if not supported, -1 if unknown. */ -extern int popcount_support; +/* 1 if supported, -1 if not, 0 if unknown. */ +extern signed char __gl_stdint_popcount_support; -COUNT_ONE_BITS_INLINE int -popcount_supported (void) +_GL_STDBIT_INLINE bool +__gl_stdbit_popcount_supported (void) { - if (popcount_support < 0) + if (!__gl_stdbit_popcount_support) { /* Do as described in - <https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64> */ + <https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64> + Although Microsoft started requiring POPCNT in MS-Windows 11 24H2, + we'll be more cautious. */ int cpu_info[4]; __cpuid (cpu_info, 1); - popcount_support = (cpu_info[2] >> 23) & 1; + __gl_stdbit_popcount_support = cpu_info[2] & 1 << 23 ? 1 : -1; } - return popcount_support; + return 0 < __gl_stdbit_popcount_support; } - -# define COUNT_ONE_BITS(GCC_BUILTIN, MSC_BUILTIN, TYPE) \ - do \ - { \ - if (popcount_supported ()) \ - return MSC_BUILTIN (x); \ - else \ - COUNT_ONE_BITS_GENERIC (TYPE); \ - } \ - while (0) - -# else - -# define COUNT_ONE_BITS(GCC_BUILTIN, MSC_BUILTIN, TYPE) \ - COUNT_ONE_BITS_GENERIC (TYPE) - -# endif -#endif - -/* Compute and return the number of 1-bits set in X. */ -COUNT_ONE_BITS_INLINE int -count_one_bits (unsigned int x) +_GL_STDBIT_INLINE int +__gl_stdbit_popcount (unsigned int n) { - COUNT_ONE_BITS (__builtin_popcount, __popcnt, unsigned int); + return (__gl_stdbit_popcount_supported () + ? __popcnt (n) + : __gl_stdbit_popcount255 (n)); } - -/* Compute and return the number of 1-bits set in X. */ -COUNT_ONE_BITS_INLINE int -count_one_bits_l (unsigned long int x) +_GL_STDBIT_INLINE int +__gl_stdbit_popcountl (unsigned long int n) { - COUNT_ONE_BITS (__builtin_popcountl, __popcnt, unsigned long int); + return (__gl_stdbit_popcount_supported () + ? __popcnt (n) + : __gl_stdbit_popcount255 (n)); } - -/* Compute and return the number of 1-bits set in X. */ -COUNT_ONE_BITS_INLINE int -count_one_bits_ll (unsigned long long int x) +_GL_STDBIT_INLINE int +__gl_stdbit_popcountll (unsigned long long int n) { - COUNT_ONE_BITS (__builtin_popcountll, __popcnt64, unsigned long long int); + return (__gl_stdbit_popcount_supported () + ? __popcnt64 (n) + : __gl_stdbit_popcount255 (n)); } - -#ifdef __cplusplus -} -#endif - -_GL_INLINE_HEADER_END - - -/* An expression, preferably with the type of A, that has the value of B. */ -#if ((defined __GNUC__ && 2 <= __GNUC__) \ - || (defined __clang_major__ && 4 <= __clang_major__) \ - || (defined __IBMC__ && 1210 <= __IBMC__ && defined __IBM__TYPEOF__) \ - || (defined __SUNPRO_C && 0x5110 <= __SUNPRO_C && !__STDC__)) -# define _GL_STDBIT_TYPEOF_CAST(a, b) ((__typeof__ (a)) (b)) -#elif 202311 <= __STDC_VERSION__ -# define _GL_STDBIT_TYPEOF_CAST(a, b) ((typeof (a)) (b)) -#else -/* This platform is so old that it lacks typeof, so _Generic is likely - missing or unreliable. The C23 standard seems to allow yielding B - (which is always unsigned long long int), so do that. */ -# define _GL_STDBIT_TYPEOF_CAST(a, b) (b) -#endif - -#define __STDC_VERSION_STDBIT_H__ 202311L - -#define __STDC_ENDIAN_BIG__ 4321 -#define __STDC_ENDIAN_LITTLE__ 1234 -#ifdef WORDS_BIGENDIAN -# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_BIG__ -#else -# define __STDC_ENDIAN_NATIVE__ __STDC_ENDIAN_LITTLE__ -#endif - -_GL_INLINE_HEADER_BEGIN -#ifndef _GL_STDBIT_INLINE -# define _GL_STDBIT_INLINE _GL_INLINE -#endif - -#ifdef __cplusplus -extern "C" { +# else /* !_MSC_VER */ +# define __gl_stdbit_popcount __gl_stdbit_popcount255 +# define __gl_stdbit_popcountl __gl_stdbit_popcount255 +# define __gl_stdbit_popcountll __gl_stdbit_popcount255 +# endif #endif _GL_STDBIT_INLINE unsigned int stdc_leading_zeros_ui (unsigned int n) { - return count_leading_zeros (n); + return n ? __gl_stdbit_clz (n) : 8 * sizeof n; } _GL_STDBIT_INLINE unsigned int @@ -436,13 +313,13 @@ stdc_leading_zeros_us (unsigned short int n) _GL_STDBIT_INLINE unsigned int stdc_leading_zeros_ul (unsigned long int n) { - return count_leading_zeros_l (n); + return n ? __gl_stdbit_clzl (n) : 8 * sizeof n; } _GL_STDBIT_INLINE unsigned int stdc_leading_zeros_ull (unsigned long long int n) { - return count_leading_zeros_ll (n); + return n ? __gl_stdbit_clzll (n) : 8 * sizeof n; } #define stdc_leading_zeros(n) \ @@ -494,7 +371,7 @@ stdc_leading_ones_ull (unsigned long long int n) _GL_STDBIT_INLINE unsigned int stdc_trailing_zeros_ui (unsigned int n) { - return count_trailing_zeros (n); + return n ? __gl_stdbit_ctz (n) : 8 * sizeof n; } _GL_STDBIT_INLINE unsigned int @@ -512,13 +389,13 @@ stdc_trailing_zeros_us (unsigned short int n) _GL_STDBIT_INLINE unsigned int stdc_trailing_zeros_ul (unsigned long int n) { - return count_trailing_zeros_l (n); + return n ? __gl_stdbit_ctzl (n) : 8 * sizeof n; } _GL_STDBIT_INLINE unsigned int stdc_trailing_zeros_ull (unsigned long long int n) { - return count_trailing_zeros_ll (n); + return n ? __gl_stdbit_ctzll (n) : 8 * sizeof n; } #define stdc_trailing_zeros(n) \ @@ -742,7 +619,7 @@ stdc_first_trailing_one_ull (unsigned long long int n) _GL_STDBIT_INLINE unsigned int stdc_count_ones_ui (unsigned int n) { - return count_one_bits (n); + return __gl_stdbit_popcount (n); } _GL_STDBIT_INLINE unsigned int @@ -760,13 +637,13 @@ stdc_count_ones_us (unsigned short int n) _GL_STDBIT_INLINE unsigned int stdc_count_ones_ul (unsigned long int n) { - return count_one_bits_l (n); + return __gl_stdbit_popcountl (n); } _GL_STDBIT_INLINE unsigned int stdc_count_ones_ull (unsigned long long int n) { - return count_one_bits_ll (n); + return __gl_stdbit_popcountll (n); } #define stdc_count_ones(n) \ diff --git a/modules/stdbit b/modules/stdbit index f70386566b..e8144ab71a 100644 --- a/modules/stdbit +++ b/modules/stdbit @@ -7,6 +7,7 @@ lib/stdbit.in.h m4/stdbit_h.m4 Depends-on: +assert-h [$GL_GENERATE_STDBIT_H] stdbool [$GL_GENERATE_STDBIT_H] configure.ac: -- 2.44.0