Hi, On 2017-09-13 23:34:18 -0700, Andres Freund wrote: > I'm not yet super sure about the implementation. For one, I'm not > sure this shouldn't instead be stringinfo.h functions, with very very > tiny pqformat.h wrappers. But conversely I think it'd make a lot of > sense for the pqformat integer functions to get rid of the > continually maintained trailing null-byte - I was hoping the compiler > could optimize that away, but alas, no luck. As soon as a single > integer is sent, you can't rely on 0 terminated strings anyway.
I'd been wondering about missing CPU optimizations after the patch, and hunted it down. Turns out the problem is that htons/ntohs are, on pretty much all glibc versions, implemented using inline assembler. Which in turns allows the compiler very little freedom to perform optimizations, because it doesn't know what's actually happening. Attached is an extension of the already existing pg_bswap.h that a) adds 16 bit support b) moves everything to inline functions, removing multiple evaluation hazards that were present everywhere. c) adds pg_nto{s,l,ll} and pg_hton{s,l,ll} wrappers that only do work if necessary. This'll allow the later patches to allow the compiler to perform the relevant optimizations. It also allows to optimize e.g. pq_sendint64() to avoid having to do multiple byteswaps. Greetings, Andres Freund
>From 2bf6c7508ca013b9c45c6bee0168ce01ff1ea8bc Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Wed, 20 Sep 2017 14:04:06 -0700 Subject: [PATCH] Extend & revamp pg_bswap.h infrastructure. Upcoming patches are going to address performance issues that involve slow system provided ntohs/htons etc. To address that expand pg_bswap.h to provide pg_ntohs/l/ll, pg_htons/l/ll and optimize their respective implementations by falling back to compiler intrinsics for gcc compatible compilers and msvc. Additionally remove multiple evaluation hazards from the existing BSWAP32/64 macros, by replacing them with inline functions when necessary. In the course of that the naming scheme is changed to pg_bswap16/32/64. Author: Andres Freund --- config/c-compiler.m4 | 17 ++++++ contrib/btree_gist/btree_uuid.c | 4 +- src/include/port/pg_bswap.h | 132 ++++++++++++++++++++++++++++++++-------- src/include/port/pg_crc32c.h | 2 +- 4 files changed, 128 insertions(+), 27 deletions(-) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 7275ea69fe..3a4498fec4 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -224,6 +224,23 @@ AC_DEFINE(HAVE__BUILTIN_TYPES_COMPATIBLE_P, 1, fi])# PGAC_C_TYPES_COMPATIBLE +# PGAC_C_BUILTIN_BSWAP16 +# ------------------------- +# Check if the C compiler understands __builtin_bswap16(), +# and define HAVE__BUILTIN_BSWAP16 if so. +AC_DEFUN([PGAC_C_BUILTIN_BSWAP16], +[AC_CACHE_CHECK(for __builtin_bswap16, pgac_cv__builtin_bswap16, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static unsigned long int x = __builtin_bswap16(0xaabb);] +)], +[pgac_cv__builtin_bswap16=yes], +[pgac_cv__builtin_bswap16=no])]) +if test x"$pgac_cv__builtin_bswap16" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_BSWAP16, 1, + [Define to 1 if your compiler understands __builtin_bswap16.]) +fi])# PGAC_C_BUILTIN_BSWAP16 + + # PGAC_C_BUILTIN_BSWAP32 # ------------------------- diff --git a/contrib/btree_gist/btree_uuid.c b/contrib/btree_gist/btree_uuid.c index ecf357d662..9ff421ea55 100644 --- a/contrib/btree_gist/btree_uuid.c +++ b/contrib/btree_gist/btree_uuid.c @@ -182,8 +182,8 @@ uuid_2_double(const pg_uuid_t *u) * machine, byte-swap each half so we can use native uint64 arithmetic. */ #ifndef WORDS_BIGENDIAN - uu[0] = BSWAP64(uu[0]); - uu[1] = BSWAP64(uu[1]); + uu[0] = pg_bswap64(uu[0]); + uu[1] = pg_bswap64(uu[1]); #endif /* diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h index 50a6bd106b..3d10aa247b 100644 --- a/src/include/port/pg_bswap.h +++ b/src/include/port/pg_bswap.h @@ -3,15 +3,13 @@ * pg_bswap.h * Byte swapping. * - * Macros for reversing the byte order of 32-bit and 64-bit unsigned integers. + * Macros for reversing the byte order of 16, 32 and 64-bit unsigned integers. * For example, 0xAABBCCDD becomes 0xDDCCBBAA. These are just wrappers for * built-in functions provided by the compiler where support exists. - * Elsewhere, beware of multiple evaluations of the arguments! * - * Note that the GCC built-in functions __builtin_bswap32() and - * __builtin_bswap64() are documented as accepting single arguments of type - * uint32_t and uint64_t respectively (these are also the respective return - * types). Use caution when using these wrapper macros with signed integers. + * Note that all of these functions accept unsigned integers as arguments and + * return the same. Use caution when using these wrapper macros with signed + * integers. * * Copyright (c) 2015-2017, PostgreSQL Global Development Group * @@ -22,28 +20,114 @@ #ifndef PG_BSWAP_H #define PG_BSWAP_H -#ifdef HAVE__BUILTIN_BSWAP32 -#define BSWAP32(x) __builtin_bswap32(x) + +/* In all supported versions msvc provides _byteswap_* functions in stdlib.h */ +#ifdef _MSC_VER +#include <stdlib.h> +#endif + + +/* implementation of uint16 pg_bswap16(uint16) */ +#if defined(HAVE__BUILTIN_BSWAP16) + +#define pg_bswap16(x) __builtin_bswap16(x) + +#elif defined(_MSC_VER) + +#define pg_bswap16(x) _byteswap_ushort(x) + #else -#define BSWAP32(x) ((((x) << 24) & 0xff000000) | \ - (((x) << 8) & 0x00ff0000) | \ - (((x) >> 8) & 0x0000ff00) | \ - (((x) >> 24) & 0x000000ff)) + +static inline uint16 +pg_bswap16(uint16 x) +{ + return + ((x << 8) & 0xff00) | + ((x >> 8) & 0x00ff); +} + +#endif /* HAVE__BUILTIN_BSWAP16 */ + + +/* implementation of uint32 pg_bswap32(uint32) */ +#if defined(HAVE__BUILTIN_BSWAP32) + +#define pg_bswap32(x) __builtin_bswap32(x) + +#elif defined(_MSC_VER) + +#define pg_bswap32(x) _byteswap_ulong(x) + +#else + +static inline uint32 +pg_bswap32(uint32 x) +{ + return + ((x << 24) & 0xff000000) | + ((x << 8) & 0x00ff0000) | + ((x >> 8) & 0x0000ff00) | + ((x >> 24) & 0x000000ff); +} + #endif /* HAVE__BUILTIN_BSWAP32 */ -#ifdef HAVE__BUILTIN_BSWAP64 -#define BSWAP64(x) __builtin_bswap64(x) + +/* implementation of uint64 pg_bswap64(uint64) */ +#if defined(HAVE__BUILTIN_BSWAP64) + +#define pg_bswap64(x) __builtin_bswap64(x) + + +#elif defined(_MSC_VER) + +#define pg_bswap64(x) _byteswap_uint64(x) + #else -#define BSWAP64(x) ((((x) << 56) & UINT64CONST(0xff00000000000000)) | \ - (((x) << 40) & UINT64CONST(0x00ff000000000000)) | \ - (((x) << 24) & UINT64CONST(0x0000ff0000000000)) | \ - (((x) << 8) & UINT64CONST(0x000000ff00000000)) | \ - (((x) >> 8) & UINT64CONST(0x00000000ff000000)) | \ - (((x) >> 24) & UINT64CONST(0x0000000000ff0000)) | \ - (((x) >> 40) & UINT64CONST(0x000000000000ff00)) | \ - (((x) >> 56) & UINT64CONST(0x00000000000000ff))) + +static inline uint16 +pg_bswap64(uint16 x) +{ + return + ((x << 56) & UINT64CONST(0xff00000000000000)) | + ((x << 40) & UINT64CONST(0x00ff000000000000)) | + ((x << 24) & UINT64CONST(0x0000ff0000000000)) | + ((x << 8) & UINT64CONST(0x000000ff00000000)) | + ((x >> 8) & UINT64CONST(0x00000000ff000000)) | + ((x >> 24) & UINT64CONST(0x0000000000ff0000)) | + ((x >> 40) & UINT64CONST(0x000000000000ff00)) | + ((x >> 56) & UINT64CONST(0x00000000000000ff)); +} #endif /* HAVE__BUILTIN_BSWAP64 */ + +/* + * Portable and fast equivalents for for ntohs, ntohl, htons, htonl, + * additionally extended to 64 bits. + */ +#ifdef WORDS_BIGENDIAN + +#define pg_htons(x) (x) +#define pg_htonl(x) (x) +#define pg_htonll(x) (x) + +#define pg_ntohs(x) (x) +#define pg_ntohl(x) (x) +#define pg_ntohll(x) (x) + +#else + +#define pg_htons(x) pg_bswap16(x) +#define pg_htonl(x) pg_bswap32(x) +#define pg_htonll(x) pg_bswap64(x) + +#define pg_ntohs(x) pg_bswap16(x) +#define pg_ntohl(x) pg_bswap32(x) +#define pg_ntohll(x) pg_bswap64(x) + +#endif /* WORDS_BIGENDIAN */ + + /* * Rearrange the bytes of a Datum from big-endian order into the native byte * order. On big-endian machines, this does nothing at all. Note that the C @@ -60,9 +144,9 @@ #define DatumBigEndianToNative(x) (x) #else /* !WORDS_BIGENDIAN */ #if SIZEOF_DATUM == 8 -#define DatumBigEndianToNative(x) BSWAP64(x) +#define DatumBigEndianToNative(x) pg_bswap64(x) #else /* SIZEOF_DATUM != 8 */ -#define DatumBigEndianToNative(x) BSWAP32(x) +#define DatumBigEndianToNative(x) pg_bswap32(x) #endif /* SIZEOF_DATUM == 8 */ #endif /* WORDS_BIGENDIAN */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index cd58ecc988..32d7176273 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -73,7 +73,7 @@ extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) #define COMP_CRC32C(crc, data, len) \ ((crc) = pg_comp_crc32c_sb8((crc), (data), (len))) #ifdef WORDS_BIGENDIAN -#define FIN_CRC32C(crc) ((crc) = BSWAP32(crc) ^ 0xFFFFFFFF) +#define FIN_CRC32C(crc) ((crc) = pg_bswap32(crc) ^ 0xFFFFFFFF) #else #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) #endif -- 2.14.1.536.g6867272d5b.dirty
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers