On 2019-Feb-14, Tom Lane wrote: > static inline int > pg_clz(...)
Hmm, I missed this bit. So we put all these functions in the header, as in the attached. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
commit d4265b36754720f85bf2ac6fd9ae2c58b8e1abc2 Author: Alvaro Herrera <alvhe...@alvh.no-ip.org> AuthorDate: Thu Feb 14 15:18:03 2019 -0300 CommitDate: Thu Feb 14 19:41:41 2019 -0300 fix popcount etc diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 148c5550573..72dfd1d2695 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -10,17 +10,175 @@ * *------------------------------------------------------------------------ - */ - #ifndef PG_BITUTILS_H #define PG_BITUTILS_H -extern int (*pg_popcount32) (uint32 word); -extern int (*pg_popcount64) (uint64 word); -extern int (*pg_rightmost_one32) (uint32 word); -extern int (*pg_rightmost_one64) (uint64 word); -extern int (*pg_leftmost_one32) (uint32 word); -extern int (*pg_leftmost_one64) (uint64 word); - +extern int (*pg_popcount32) (uint32 word); +extern int (*pg_popcount64) (uint64 word); extern uint64 pg_popcount(const char *buf, int bytes); +/* in pg_popcount.c */ +extern int pg_popcount32_sse42(uint32 word); +extern int pg_popcount64_sse42(uint64 word); + + +#ifndef HAVE__BUILTIN_CTZ +/* + * Array marking the position of the right-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 rightmost_one_pos[256] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; +#endif /* !HAVE__BUILTIN_CTZ */ + +/* + * pg_rightmost_one32 + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static inline int +pg_rightmost_one32(uint32 word) +{ + int result = 0; + + Assert(word != 0); + +#ifdef HAVE__BUILTIN_CTZ + result = __builtin_ctz(word); +#else + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; +#endif /* HAVE__BUILTIN_CTZ */ + + return result; +} + +/* + * pg_rightmost_one64 + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +static inline int +pg_rightmost_one64(uint64 word) +{ + int result = 0; + + Assert(word != 0); + +#ifdef HAVE__BUILTIN_CTZ +#if defined(HAVE_LONG_INT_64) + return __builtin_ctzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_ctzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* HAVE__BUILTIN_CTZ */ + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; +#endif + + return result; +} + +#ifndef HAVE__BUILTIN_CLZ +/* + * Array marking the position of the left-most set bit for each value of + * 1-255. We count the right-most position as the 0th bit, and the + * left-most the 7th bit. The 0th index of the array must not be used. + */ +static const uint8 leftmost_one_pos[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; +#endif /* !HAVE_BUILTIN_CLZ */ + +/* + * pg_leftmost_one32 + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one32(uint32 word) +{ +#ifdef HAVE__BUILTIN_CLZ + return 31 - __builtin_clz(word); +#else + int shift = 32 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +#endif /* HAVE__BUILTIN_CLZ */ +} + +/* + * pg_leftmost_one64 + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one64(uint64 word) +{ +#ifdef HAVE__BUILTIN_CLZ +#if defined(HAVE_LONG_INT_64) + return 63 - __builtin_clzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return 63 - __builtin_clzll(word); +#else +#error must have a working 64-bit integer datatype +#endif +#else /* HAVE__BUILTIN_CLZ */ + int shift = 64 - 8; + + Assert(word != 0); + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +#endif /* !HAVE__BUIILTIN_CLZ */ +} + #endif /* PG_BITUTILS_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 2da73260a13..d7290573c65 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -41,6 +41,13 @@ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ tar.o thread.o +# If the compiler supports a flag for the POPCOUNT instruction, we compile +# pg_popcount.o with it. (Whether to actually use the functions therein is +# determined at runtime by testing CPUID flags.) +ifneq ($(CFLAGS_POPCNT),) +OBJS += pg_popcount.o +endif + # libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files # foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c OBJS_SHLIB = $(OBJS:%.o=%_shlib.o) @@ -78,10 +85,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) -# pg_bitutils.c needs CFLAGS_POPCNT -pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_srv.o: CFLAGS+=$(CFLAGS_POPCNT) +# pg_popcount.c needs CFLAGS_POPCNT +pg_popcount.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_popcount_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_popcount_srv.o: CFLAGS+=$(CFLAGS_POPCNT) # # Shared library versions of object files diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index aac394fe927..3d26883111f 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -10,7 +10,6 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" #ifdef HAVE__GET_CPUID @@ -23,61 +22,21 @@ #include "port/pg_bitutils.h" -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +#ifdef HAVE__BUILTIN_POPCOUNT static bool pg_popcount_available(void); -static int pg_popcount32_choose(uint32 word); -static int pg_popcount32_sse42(uint32 word); -static int pg_popcount64_choose(uint64 word); -static int pg_popcount64_sse42(uint64 word); -#endif -static int pg_popcount32_slow(uint32 word); -static int pg_popcount64_slow(uint64 word); - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) -static bool pg_lzcnt_available(void); -#endif - -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -static int pg_rightmost_one32_choose(uint32 word); -static int pg_rightmost_one32_abm(uint32 word); -static int pg_rightmost_one64_choose(uint64 word); -static int pg_rightmost_one64_abm(uint64 word); -#endif -static int pg_rightmost_one32_slow(uint32 word); -static int pg_rightmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -static int pg_leftmost_one32_choose(uint32 word); -static int pg_leftmost_one32_abm(uint32 word); -static int pg_leftmost_one64_choose(uint64 word); -static int pg_leftmost_one64_abm(uint64 word); -#endif -static int pg_leftmost_one32_slow(uint32 word); -static int pg_leftmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) -int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; -int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; +static int pg_popcount32_choose(uint32 word); +static int pg_popcount32_builtin(uint32 word); +static int pg_popcount64_choose(uint64 word); +static int pg_popcount64_builtin(uint64 word); +int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; +int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; #else -int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; -int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; -#endif +static int pg_popcount32_slow(uint32 word); +static int pg_popcount64_slow(uint64 word); +int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; +int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; +#endif /* !HAVE_BUILTIN_POPCOUNT */ -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose; -int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose; -#else -int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow; -int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow; -#endif - -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose; -int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose; -#else -int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow; -int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow; -#endif /* Array marking the number of 1-bits for each value of 0-255. */ static const uint8 number_of_ones[256] = { @@ -100,77 +59,33 @@ static const uint8 number_of_ones[256] = { }; /* - * Array marking the position of the right-most set bit for each value of - * 1-255. We count the right-most position as the 0th bit, and the - * left-most the 7th bit. The 0th index of the array must not be used. + * Return true iff we have CPUID support and it indicates that the POPCNT + * instruction is available. */ -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -/* - * Array marking the position of the left-most set bit for each value of - * 1-255. We count the right-most position as the 0th bit, and the - * left-most the 7th bit. The 0th index of the array must not be used. - */ -static const uint8 leftmost_one_pos[256] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - static bool pg_popcount_available(void) { - unsigned int exx[4] = { 0, 0, 0, 0 }; +#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) + unsigned int exx[4] = {0, 0, 0, 0}; #if defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) __cpuid(exx, 1); -#else -#error cpuid instruction not available #endif return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} +#else /* HAVE__GET_CPUID || HAVE__CPUID */ + + return false; #endif +} -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - +#ifdef HAVE__BUILTIN_POPCOUNT /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * This gets called on the first call to pg_popcount32. It replaces the + * function pointer so that subsequent calls are routed directly to the chosen + * implementation. */ static int pg_popcount32_choose(uint32 word) @@ -178,18 +93,17 @@ pg_popcount32_choose(uint32 word) if (pg_popcount_available()) pg_popcount32 = pg_popcount32_sse42; else - pg_popcount32 = pg_popcount32_slow; + pg_popcount32 = pg_popcount32_builtin; return pg_popcount32(word); } static int -pg_popcount32_sse42(uint32 word) +pg_popcount32_builtin(uint32 word) { return __builtin_popcount(word); } -#endif - +#else /* HAVE__BUILTIN_POPCOUNT */ /* * pg_popcount32_slow * Return the number of 1 bits set in word @@ -197,7 +111,7 @@ pg_popcount32_sse42(uint32 word) static int pg_popcount32_slow(uint32 word) { - int result = 0; + int result = 0; while (word != 0) { @@ -207,6 +121,7 @@ pg_popcount32_slow(uint32 word) return result; } +#endif /* * pg_popcount @@ -215,13 +130,13 @@ pg_popcount32_slow(uint32 word) uint64 pg_popcount(const char *buf, int bytes) { - uint64 popcnt = 0; + uint64 popcnt = 0; #if SIZEOF_VOID_P >= 8 /* Process in 64-bit chunks if the buffer is aligned. */ if (buf == (char *) TYPEALIGN(8, buf)) { - uint64 *words = (uint64 *) buf; + uint64 *words = (uint64 *) buf; while (bytes >= 8) { @@ -235,7 +150,7 @@ pg_popcount(const char *buf, int bytes) /* Process in 32-bit chunks if the buffer is aligned. */ if (buf == (char *) TYPEALIGN(4, buf)) { - uint32 *words = (uint32 *) buf; + uint32 *words = (uint32 *) buf; while (bytes >= 4) { @@ -254,11 +169,11 @@ pg_popcount(const char *buf, int bytes) return popcnt; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - +#ifdef HAVE__BUILTIN_POPCOUNT /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * This gets called on the first call to pg_popcount64. It replaces the + * function pointer so that subsequent calls are routed directly to the chosen + * implementation. */ static int pg_popcount64_choose(uint64 word) @@ -266,26 +181,24 @@ pg_popcount64_choose(uint64 word) if (pg_popcount_available()) pg_popcount64 = pg_popcount64_sse42; else - pg_popcount64 = pg_popcount64_slow; + pg_popcount64 = pg_popcount64_builtin; return pg_popcount64(word); } static int -pg_popcount64_sse42(uint64 word) +pg_popcount64_builtin(uint64 word) { #if defined(HAVE_LONG_INT_64) return __builtin_popcountl(word); #elif defined(HAVE_LONG_LONG_INT_64) return __builtin_popcountll(word); #else - /* shouldn't happen */ #error must have a working 64-bit integer datatype #endif } -#endif - +#else /* HAVE__BUILTIN_POPCOUNT */ /* * pg_popcount64_slow * Return the number of 1 bits set in word @@ -293,7 +206,7 @@ pg_popcount64_sse42(uint64 word) static int pg_popcount64_slow(uint64 word) { - int result = 0; + int result = 0; while (word != 0) { @@ -303,211 +216,4 @@ pg_popcount64_slow(uint64 word) return result; } - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) - -static bool -pg_lzcnt_available(void) -{ - - unsigned int exx[4] = { 0, 0, 0, 0 }; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 0x80000001); -#else -#error cpuid instruction not available #endif - - return (exx[2] & (1 << 5)) != 0; /* LZCNT */ -} -#endif - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_rightmost_one32_choose(uint32 word) -{ - if (pg_lzcnt_available()) - pg_rightmost_one32 = pg_rightmost_one32_abm; - else - pg_rightmost_one32 = pg_rightmost_one32_slow; - - return pg_rightmost_one32(word); -} - -static int -pg_rightmost_one32_abm(uint32 word) -{ - return __builtin_ctz(word); -} - -#endif - -/* - * pg_rightmost_one32_slow - * Returns the number of trailing 0-bits in word, starting at the least - * significant bit position. word must not be 0. - */ -static int -pg_rightmost_one32_slow(uint32 word) -{ - int result = 0; - - Assert(word != 0); - - while ((word & 255) == 0) - { - word >>= 8; - result += 8; - } - result += rightmost_one_pos[word & 255]; - - return result; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_rightmost_one64_choose(uint64 word) -{ - if (pg_lzcnt_available()) - pg_rightmost_one64 = pg_rightmost_one64_abm; - else - pg_rightmost_one64 = pg_rightmost_one64_slow; - - return pg_rightmost_one64(word); -} - -static int -pg_rightmost_one64_abm(uint64 word) -{ -#if defined(HAVE_LONG_INT_64) - return __builtin_ctzl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return __builtin_ctzll(word); -#else - /* shouldn't happen */ -#error must have a working 64-bit integer datatype -#endif -} -#endif - -/* - * pg_rightmost_one64_slow - * Returns the number of trailing 0-bits in word, starting at the least - * significant bit position. word must not be 0. - */ -static int -pg_rightmost_one64_slow(uint64 word) -{ - int result = 0; - - Assert(word != 0); - - while ((word & 255) == 0) - { - word >>= 8; - result += 8; - } - result += rightmost_one_pos[word & 255]; - - return result; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_leftmost_one32_choose(uint32 word) -{ - if (pg_lzcnt_available()) - pg_leftmost_one32 = pg_leftmost_one32_abm; - else - pg_leftmost_one32 = pg_leftmost_one32_slow; - - return pg_leftmost_one32(word); -} - -static int -pg_leftmost_one32_abm(uint32 word) -{ - return 31 - __builtin_clz(word); -} -#endif - -/* - * pg_leftmost_one32_slow - * Returns the 0-based position of the most significant set bit in word - * measured from the least significant bit. word must not be 0. - */ -static int -pg_leftmost_one32_slow(uint32 word) -{ - int shift = 32 - 8; - - Assert(word != 0); - - while ((word >> shift) == 0) - shift -= 8; - - return shift + leftmost_one_pos[(word >> shift) & 255]; -} - -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static int -pg_leftmost_one64_choose(uint64 word) -{ - if (pg_lzcnt_available()) - pg_leftmost_one64 = pg_leftmost_one64_abm; - else - pg_leftmost_one64 = pg_leftmost_one64_slow; - - return pg_leftmost_one64(word); -} - -static int -pg_leftmost_one64_abm(uint64 word) -{ -#if defined(HAVE_LONG_INT_64) - return 63 - __builtin_clzl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return 63 - __builtin_clzll(word); -#else - /* shouldn't happen */ -#error must have a working 64-bit integer datatype -#endif - -} -#endif - -/* - * pg_leftmost_one64_slow - * Returns the 0-based position of the most significant set bit in word - * measured from the least significant bit. word must not be 0. - */ -static int -pg_leftmost_one64_slow(uint64 word) -{ - int shift = 64 - 8; - - Assert(word != 0); - - while ((word >> shift) == 0) - shift -= 8; - - return shift + leftmost_one_pos[(word >> shift) & 255]; -} diff --git a/src/port/pg_popcount.c b/src/port/pg_popcount.c new file mode 100644 index 00000000000..5254c41273f --- /dev/null +++ b/src/port/pg_popcount.c @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * + * pg_popcount.c + * CPU-optimized implementation of pg_popcount + * + * This file must be compiled with a compiler-specific flag to enable the + * POPCOUNT instruction. + * + * Portions Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_popcount.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "port/pg_bitutils.h" + +int +pg_popcount32_sse42(uint32 word) +{ + return __builtin_popcount(word); +} + +int +pg_popcount64_sse42(uint64 word) +{ +#if defined(HAVE_LONG_INT_64) + return __builtin_popcountl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_popcountll(word); +#else +#error must have a working 64-bit integer datatype +#endif +}