On 2019-Feb-14, Tom Lane wrote: > Some further thoughts here ... > > Does the "lzcnt" runtime probe actually do anything useful? > On the x86_64 compilers I tried (gcc 8.2.1 and 4.4.7), __builtin_clz > and __builtin_ctz compile to sequences involving bsrq and bsfq > regardless of -mpopcnt. It's fairly hard to see how lzcnt would > buy anything over those sequences even if there were zero overhead > involved in using it.
Hah, I just realized you have to add -mlzcnt in order for these builtins to use the lzcnt instructions. It goes from something like bsrq %rax, %rax xorq $63, %rax to lzcntq %rax, %rax Significant? I have this patch now, written before I realized the above; I'll augment it to cater to this (adding -mlzcnt and a new set of functions -- perhaps a new file "lzcnt.c" or maybe put the lot in pg_popcount.c and rename it?) and resubmit after an errand I have to run now. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
commit 477cc89802effed5d80d4a82891c7ef3b5e61e63 Author: Alvaro Herrera <alvhe...@alvh.no-ip.org> AuthorDate: Thu Feb 14 15:18:03 2019 -0300 CommitDate: Thu Feb 14 15:31:32 2019 -0300 fix popcount etc diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 148c5550573..53787a7ef32 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -23,4 +23,8 @@ extern int (*pg_leftmost_one64) (uint64 word); extern uint64 pg_popcount(const char *buf, int bytes); +/* in pg_popcount.c */ +extern int pg_popcount32_sse42(uint32 word); +extern int pg_popcount64_sse42(uint64 word); + #endif /* PG_BITUTILS_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 2da73260a13..d7290573c65 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -41,6 +41,13 @@ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ qsort.o qsort_arg.o quotes.o snprintf.o sprompt.o strerror.o \ tar.o thread.o +# If the compiler supports a flag for the POPCOUNT instruction, we compile +# pg_popcount.o with it. (Whether to actually use the functions therein is +# determined at runtime by testing CPUID flags.) +ifneq ($(CFLAGS_POPCNT),) +OBJS += pg_popcount.o +endif + # libpgport.a, libpgport_shlib.a, and libpgport_srv.a contain the same files # foo.o, foo_shlib.o, and foo_srv.o are all built from foo.c OBJS_SHLIB = $(OBJS:%.o=%_shlib.o) @@ -78,10 +85,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) -# pg_bitutils.c needs CFLAGS_POPCNT -pg_bitutils.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) -pg_bitutils_srv.o: CFLAGS+=$(CFLAGS_POPCNT) +# pg_popcount.c needs CFLAGS_POPCNT +pg_popcount.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_popcount_shlib.o: CFLAGS+=$(CFLAGS_POPCNT) +pg_popcount_srv.o: CFLAGS+=$(CFLAGS_POPCNT) # # Shared library versions of object files diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index aac394fe927..fc8de518791 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -10,7 +10,6 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" #ifdef HAVE__GET_CPUID @@ -23,61 +22,52 @@ #include "port/pg_bitutils.h" -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +#ifdef HAVE__BUILTIN_POPCOUNT static bool pg_popcount_available(void); static int pg_popcount32_choose(uint32 word); -static int pg_popcount32_sse42(uint32 word); +static int pg_popcount32_builtin(uint32 word); static int pg_popcount64_choose(uint64 word); -static int pg_popcount64_sse42(uint64 word); -#endif -static int pg_popcount32_slow(uint32 word); -static int pg_popcount64_slow(uint64 word); - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) -static bool pg_lzcnt_available(void); -#endif - -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) -static int pg_rightmost_one32_choose(uint32 word); -static int pg_rightmost_one32_abm(uint32 word); -static int pg_rightmost_one64_choose(uint64 word); -static int pg_rightmost_one64_abm(uint64 word); -#endif -static int pg_rightmost_one32_slow(uint32 word); -static int pg_rightmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) -static int pg_leftmost_one32_choose(uint32 word); -static int pg_leftmost_one32_abm(uint32 word); -static int pg_leftmost_one64_choose(uint64 word); -static int pg_leftmost_one64_abm(uint64 word); -#endif -static int pg_leftmost_one32_slow(uint32 word); -static int pg_leftmost_one64_slow(uint64 word); - -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE__GET_CPUID) +static int pg_popcount64_builtin(uint64 word); int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; #else +static int pg_popcount32_slow(uint32 word); +static int pg_popcount64_slow(uint64 word); int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; #endif -#if defined(HAVE__BUILTIN_CTZ) && defined(HAVE__GET_CPUID) +#if defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ) +static bool pg_lzcnt_available(void); +#endif + +#ifdef HAVE__BUILTIN_CTZ +static int pg_rightmost_one32_choose(uint32 word); +static int pg_rightmost_one32_abm(uint32 word); +static int pg_rightmost_one64_choose(uint64 word); +static int pg_rightmost_one64_abm(uint64 word); int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose; int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose; #else int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow; int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow; #endif +static int pg_rightmost_one32_slow(uint32 word); +static int pg_rightmost_one64_slow(uint64 word); -#if defined(HAVE__BUILTIN_CLZ) && defined(HAVE__GET_CPUID) +#ifdef HAVE__BUILTIN_CLZ +static int pg_leftmost_one32_choose(uint32 word); +static int pg_leftmost_one32_abm(uint32 word); +static int pg_leftmost_one64_choose(uint64 word); +static int pg_leftmost_one64_abm(uint64 word); int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose; int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose; #else int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow; int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow; #endif +static int pg_leftmost_one32_slow(uint32 word); +static int pg_leftmost_one64_slow(uint64 word); /* Array marking the number of 1-bits for each value of 0-255. */ static const uint8 number_of_ones[256] = { @@ -147,27 +137,27 @@ static const uint8 leftmost_one_pos[256] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) static bool pg_popcount_available(void) { +#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) unsigned int exx[4] = { 0, 0, 0, 0 }; #if defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) __cpuid(exx, 1); -#else -#error cpuid instruction not available #endif return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} +#else /* HAVE__GET_CPUID || HAVE__CPUID */ + + return false; #endif +} -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - +#ifdef HAVE__BUILTIN_POPCOUNT /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -178,18 +168,19 @@ pg_popcount32_choose(uint32 word) if (pg_popcount_available()) pg_popcount32 = pg_popcount32_sse42; else - pg_popcount32 = pg_popcount32_slow; + pg_popcount32 = pg_popcount32_builtin; return pg_popcount32(word); } static int -pg_popcount32_sse42(uint32 word) +pg_popcount32_builtin(uint32 word) { return __builtin_popcount(word); } #endif +#ifndef HAVE__BUILTIN_POPCOUNT /* * pg_popcount32_slow * Return the number of 1 bits set in word @@ -207,6 +198,7 @@ pg_popcount32_slow(uint32 word) return result; } +#endif /* * pg_popcount @@ -254,8 +246,7 @@ pg_popcount(const char *buf, int bytes) return popcnt; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_POPCOUNT) - +#ifdef HAVE__BUILTIN_POPCOUNT /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -266,26 +257,26 @@ pg_popcount64_choose(uint64 word) if (pg_popcount_available()) pg_popcount64 = pg_popcount64_sse42; else - pg_popcount64 = pg_popcount64_slow; + pg_popcount64 = pg_popcount64_builtin; return pg_popcount64(word); } static int -pg_popcount64_sse42(uint64 word) +pg_popcount64_builtin(uint64 word) { #if defined(HAVE_LONG_INT_64) return __builtin_popcountl(word); #elif defined(HAVE_LONG_LONG_INT_64) return __builtin_popcountll(word); #else - /* shouldn't happen */ #error must have a working 64-bit integer datatype #endif } -#endif +#endif /* HAVE__BUILTIN_POPCOUNT */ +#ifndef HAVE__BUILTIN_POPCOUNT /* * pg_popcount64_slow * Return the number of 1 bits set in word @@ -303,28 +294,28 @@ pg_popcount64_slow(uint64 word) return result; } - -#if defined(HAVE__GET_CPUID) && (defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CLZ)) +#endif static bool pg_lzcnt_available(void) { - +#if (defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)) unsigned int exx[4] = { 0, 0, 0, 0 }; #if defined(HAVE__GET_CPUID) __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) __cpuid(exx, 0x80000001); -#else -#error cpuid instruction not available #endif return (exx[2] & (1 << 5)) != 0; /* LZCNT */ -} -#endif +#else /* HAVE__GET_CPUID || HAVE__CPUID */ -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) + return false; +#endif +} + +#ifdef HAVE__BUILTIN_CTZ /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -346,7 +337,7 @@ pg_rightmost_one32_abm(uint32 word) return __builtin_ctz(word); } -#endif +#endif /* HAVE__BUILTIN_CTZ */ /* * pg_rightmost_one32_slow @@ -370,7 +361,7 @@ pg_rightmost_one32_slow(uint32 word) return result; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CTZ) +#ifdef HAVE__BUILTIN_CTZ /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -394,11 +385,10 @@ pg_rightmost_one64_abm(uint64 word) #elif defined(HAVE_LONG_LONG_INT_64) return __builtin_ctzll(word); #else - /* shouldn't happen */ #error must have a working 64-bit integer datatype #endif } -#endif +#endif /* HAVE_BUILTIN_CTZ */ /* * pg_rightmost_one64_slow @@ -422,7 +412,7 @@ pg_rightmost_one64_slow(uint64 word) return result; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) +#ifdef HAVE__BUILTIN_CLZ /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -443,7 +433,7 @@ pg_leftmost_one32_abm(uint32 word) { return 31 - __builtin_clz(word); } -#endif +#endif /* HAVE__BUILTIN_CLZ */ /* * pg_leftmost_one32_slow @@ -463,7 +453,7 @@ pg_leftmost_one32_slow(uint32 word) return shift + leftmost_one_pos[(word >> shift) & 255]; } -#if defined(HAVE__GET_CPUID) && defined(HAVE__BUILTIN_CLZ) +#ifdef HAVE__BUILTIN_CLZ /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -487,12 +477,10 @@ pg_leftmost_one64_abm(uint64 word) #elif defined(HAVE_LONG_LONG_INT_64) return 63 - __builtin_clzll(word); #else - /* shouldn't happen */ #error must have a working 64-bit integer datatype #endif - } -#endif +#endif /* HAVE__BUIILTIN_CLZ */ /* * pg_leftmost_one64_slow diff --git a/src/port/pg_popcount.c b/src/port/pg_popcount.c new file mode 100644 index 00000000000..5254c41273f --- /dev/null +++ b/src/port/pg_popcount.c @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------- + * + * pg_popcount.c + * CPU-optimized implementation of pg_popcount + * + * This file must be compiled with a compiler-specific flag to enable the + * POPCOUNT instruction. + * + * Portions Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_popcount.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "port/pg_bitutils.h" + +int +pg_popcount32_sse42(uint32 word) +{ + return __builtin_popcount(word); +} + +int +pg_popcount64_sse42(uint64 word) +{ +#if defined(HAVE_LONG_INT_64) + return __builtin_popcountl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_popcountll(word); +#else +#error must have a working 64-bit integer datatype +#endif +}