On Wed, Sep 18, 2019 at 05:42:01AM +0200, David Fetter wrote: > On Tue, Sep 17, 2019 at 09:01:57AM +0200, David Fetter wrote: > > On Tue, Sep 17, 2019 at 08:55:05AM +0200, David Fetter wrote: > > > On Sun, Sep 15, 2019 at 09:18:49AM +0200, David Fetter wrote: > > > > Folks, > > > > > > > > Please find attached a couple of patches intended to $subject. > > > > > > > > This patch set cut the time to copy ten million rows of randomly sized > > > > int8s (10 of them) by about a third, so at least for that case, it's > > > > pretty decent. > > > > > > Added int4 output, removed the sprintf stuff, as it didn't seem to > > > help in any cases I was testing. > > > > Found a couple of "whiles" that should have been "ifs." > > Factored out some inefficient functions and made the guts use the more > efficient function.
Fix copy-paste-o that introduced some unneeded 64-bit math. Best, David. -- David Fetter <david(at)fetter(dot)org> http://fetter.org/ Phone: +1 415 235 3778 Remember to vote! Consider donating to Postgres: http://www.postgresql.org/about/donate
>From b9b2e2dac6f5c6a15cf4161ff135d201ea52a207 Mon Sep 17 00:00:00 2001 From: David Fetter <da...@fetter.org> Date: Sun, 15 Sep 2019 00:06:29 -0700 Subject: [PATCH v5] Make int4 and int8 operations more efficent To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.21.0" This is a multi-part message in MIME format. --------------2.21.0 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit - Output routines now do more digits per iteration, and - Code determines the number of decimal digits in int4/int8 efficiently - Split off pg_ltoa_n from pg_ltoa - Use same to make other functions shorter diff --git a/src/backend/access/common/printsimple.c b/src/backend/access/common/printsimple.c index 651ade14dd..17ca533b87 100644 --- a/src/backend/access/common/printsimple.c +++ b/src/backend/access/common/printsimple.c @@ -112,7 +112,7 @@ printsimple(TupleTableSlot *slot, DestReceiver *self) case INT8OID: { int64 num = DatumGetInt64(value); - char str[23]; /* sign, 21 digits and '\0' */ + char str[MAXINT8LEN]; pg_lltoa(num, str); pq_sendcountedtext(&buf, str, strlen(str), false); diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 580043233b..3818dbaa85 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -39,6 +39,8 @@ jsonpath_scan.c: FLEX_NO_BACKUP=yes # jsonpath_scan is compiled as part of jsonpath_gram jsonpath_gram.o: jsonpath_scan.c +numutils.o: CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT) + # jsonpath_gram.c and jsonpath_scan.c are in the distribution tarball, # so they are not cleaned here. clean distclean maintainer-clean: diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 0ff9394a2f..6230807906 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -27,8 +27,6 @@ #include "utils/builtins.h" -#define MAXINT8LEN 25 - typedef struct { int64 current; diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 70138feb29..8ef9fac717 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -20,6 +20,58 @@ #include "common/int.h" #include "utils/builtins.h" +#include "port/pg_bitutils.h" + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' +}; + +/* + * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + */ +static inline uint32 +decimalLength32(const uint32 v) +{ + uint32 t; + static uint64 PowersOfTen[] = + {1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000}; + + t = (pg_leftmost_one_pos32(v) + 1)*1233/4096; + return t + (v >= PowersOfTen[t]); +} + +static inline uint32 +decimalLength64(const uint64 v) +{ + uint32 t; + static uint64 PowersOfTen[] = + {1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000, 10000000000, 100000000000, + 1000000000000, 10000000000000, 100000000000000, + 1000000000000000, 10000000000000000, 100000000000000000, + 1000000000000000000}; + + t = (pg_leftmost_one_pos64(v) + 1)*1233/4096; + return t + (v >= PowersOfTen[t]); +} /* * pg_atoi: convert string to integer @@ -276,16 +328,17 @@ pg_itoa(int16 i, char *a) } /* - * pg_ltoa: converts a signed 32-bit integer to its string representation + * pg_ltoa_n: converts a signed 32-bit integer to its string representation, not + * NUL-terminated, and returns the length of that string representation * - * Caller must ensure that 'a' points to enough memory to hold the result - * (at least 12 bytes, counting a leading sign and trailing NUL). + * Caller must ensure that 'a' points to enough memory to hold the result (at + * least 11 bytes, counting a leading sign). */ -void -pg_ltoa(int32 value, char *a) +static int32 +pg_ltoa_n(int32 value, char *a) { - char *start = a; - bool neg = false; + uint32 olength; + uint32 i = 0, adjust = 0; /* * Avoid problems with the most negative integer not being representable @@ -293,53 +346,111 @@ pg_ltoa(int32 value, char *a) */ if (value == PG_INT32_MIN) { - memcpy(a, "-2147483648", 12); - return; + memcpy(a, "-2147483648", 11); + return 11; } - else if (value < 0) + + /* Might as well handle this case, too */ + if (value == 0) + { + memcpy(a, "0", 1); + return 1; + } + + if (value < 0) { value = -value; - neg = true; - } - - /* Compute the result string backwards. */ - do - { - int32 remainder; - int32 oldval = value; - - value /= 10; - remainder = oldval - value * 10; - *a++ = '0' + remainder; - } while (value != 0); - - if (neg) *a++ = '-'; + adjust++; + } + + olength = decimalLength32(value); + + /* Compute the result string. */ + while (value >= 100000000) + { + /* Expensive 64-bit division. Optimize? */ + const uint32 value2 = value % 100000000; + + value /= 100000000; - /* Add trailing NUL byte, and back up 'a' to the last character. */ - *a-- = '\0'; + const uint32 c = value2 % 10000; + const uint32 d = value2 / 10000; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + const uint32 d0 = (d % 100) << 1; + const uint32 d1 = (d / 100) << 1; - /* Reverse string. */ - while (start < a) + memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2); + memcpy(a + olength - i - 6, DIGIT_TABLE + d0, 2); + memcpy(a + olength - i - 8, DIGIT_TABLE + d1, 2); + i += 8; + } + + if (value >= 10000) { - char swap = *start; + const uint32 c = value - 10000 * (value / 10000); + + value /= 10000; - *start++ = *a; - *a-- = swap; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2); + i += 4; } + if (value >= 100) + { + const uint32 c = (value % 100) << 1; + + value /= 100; + memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (value >= 10) + { + const uint32 c = value << 1; + + memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + else + { + *a = (char) ('0' + value); + i++; + } + + i += adjust; + return i; +} + +/* + * NUL-terminate the output of pg_ltoa_n. + * + * It is the caller's responsibility to ensure that a is at least 12 bytes long, + * which is enough room to hold a minus sign, a maximally long int32, and the + * above terminating NUL. + */ +void +pg_ltoa(int32 value, char *a) +{ + int32 len = pg_ltoa_n(value, a); + a[len] = '\0'; } /* * pg_lltoa: convert a signed 64-bit integer to its string representation * * Caller must ensure that 'a' points to enough memory to hold the result - * (at least MAXINT8LEN+1 bytes, counting a leading sign and trailing NUL). + * (at least MAXINT8LEN bytes, counting a leading sign and trailing NUL). */ void pg_lltoa(int64 value, char *a) { - char *start = a; - bool neg = false; + uint32 olength; + uint32 i = 0; /* * Avoid problems with the most negative integer not being representable @@ -350,37 +461,82 @@ pg_lltoa(int64 value, char *a) memcpy(a, "-9223372036854775808", 21); return; } - else if (value < 0) + + /* Might as well handle this case, too */ + if (value == 0) + { + memcpy(a, "0", 2); + return; + } + + if (value < 0) { value = -value; - neg = true; - } - - /* Compute the result string backwards. */ - do - { - int64 remainder; - int64 oldval = value; - - value /= 10; - remainder = oldval - value * 10; - *a++ = '0' + remainder; - } while (value != 0); - - if (neg) *a++ = '-'; + } + + olength = decimalLength64(value); + + /* Compute the result string. */ + while (value >= 100000000) + { + /* Expensive 64-bit division. Optimize? */ + const uint64 q = value / 100000000; + uint32 value2 = (uint32) (value - 100000000 * q); + + value = q; - /* Add trailing NUL byte, and back up 'a' to the last character. */ - *a-- = '\0'; + const uint32 c = value2 % 10000; + const uint32 d = value2 / 10000; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + const uint32 d0 = (d % 100) << 1; + const uint32 d1 = (d / 100) << 1; - /* Reverse string. */ - while (start < a) + memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2); + memcpy(a + olength - i - 6, DIGIT_TABLE + d0, 2); + memcpy(a + olength - i - 8, DIGIT_TABLE + d1, 2); + i += 8; + } + + /* Switch to 32-bit for speed */ + uint32 value2 = (uint32) value; + + if (value2 >= 10000) { - char swap = *start; + const uint32 c = value2 - 10000 * (value2 / 10000); + + value2 /= 10000; + + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; - *start++ = *a; - *a-- = swap; + memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2); + i += 4; } + if (value2 >= 100) + { + const uint32 c = (value2 % 100) << 1; + + value2 /= 100; + memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (value2 >= 10) + { + const uint32 c = value2 << 1; + + memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + else + { + *a = (char) ('0' + value2); + } + + a[olength] = '\0'; } @@ -409,60 +565,44 @@ pg_lltoa(int64 value, char *a) char * pg_ltostr_zeropad(char *str, int32 value, int32 minwidth) { - char *start = str; - char *end = &str[minwidth]; - int32 num = value; + int32 len; Assert(minwidth > 0); - /* - * Handle negative numbers in a special way. We can't just write a '-' - * prefix and reverse the sign as that would overflow for INT32_MIN. - */ - if (num < 0) + if (value >= 0) { - *start++ = '-'; - minwidth--; + if (value < 100 && minwidth == 2) /* Short cut for common case */ + { + const uint32 c = value << 1; + memcpy(str, DIGIT_TABLE + c, 2); + return str + 2; + } + len = pg_ltoa_n(value, str); + if (minwidth <= len) + return str + len; + + memmove(str + minwidth - len, str, len); + for(int i = 0; i < minwidth - len; i++) + { + memcpy(str + i, DIGIT_TABLE, 1); + } + return str + minwidth; + } + else + { /* - * Build the number starting at the last digit. Here remainder will - * be a negative number, so we must reverse the sign before adding '0' - * in order to get the correct ASCII digit. + * Changing this number's sign would overflow PG_INT32_MAX, + * so special-case it. */ - while (minwidth--) + if (value == PG_INT32_MIN) { - int32 oldval = num; - int32 remainder; - - num /= 10; - remainder = oldval - num * 10; - start[minwidth] = '0' - remainder; + memcpy(str, "-2147483648", 11); + return str + 11; } + *str++ = '-'; + return pg_ltostr_zeropad(str, -value, minwidth - 1); } - else - { - /* Build the number starting at the last digit */ - while (minwidth--) - { - int32 oldval = num; - int32 remainder; - - num /= 10; - remainder = oldval - num * 10; - start[minwidth] = '0' + remainder; - } - } - - /* - * If minwidth was not high enough to fit the number then num won't have - * been divided down to zero. We punt the problem to pg_ltostr(), which - * will generate a correct answer in the minimum valid width. - */ - if (num != 0) - return pg_ltostr(str, value); - - /* Otherwise, return last output character + 1 */ - return end; } /* @@ -486,62 +626,8 @@ pg_ltostr_zeropad(char *str, int32 value, int32 minwidth) char * pg_ltostr(char *str, int32 value) { - char *start; - char *end; - - /* - * Handle negative numbers in a special way. We can't just write a '-' - * prefix and reverse the sign as that would overflow for INT32_MIN. - */ - if (value < 0) - { - *str++ = '-'; - - /* Mark the position we must reverse the string from. */ - start = str; - - /* Compute the result string backwards. */ - do - { - int32 oldval = value; - int32 remainder; - - value /= 10; - remainder = oldval - value * 10; - /* As above, we expect remainder to be negative. */ - *str++ = '0' - remainder; - } while (value != 0); - } - else - { - /* Mark the position we must reverse the string from. */ - start = str; - - /* Compute the result string backwards. */ - do - { - int32 oldval = value; - int32 remainder; - - value /= 10; - remainder = oldval - value * 10; - *str++ = '0' + remainder; - } while (value != 0); - } - - /* Remember the end+1 and back up 'str' to the last character. */ - end = str--; - - /* Reverse string. */ - while (start < str) - { - char swap = *start; - - *start++ = *str; - *str-- = swap; - } - - return end; + int32 len = pg_ltoa_n(value, str); + return str + len; } /* diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 937ddb7ef0..628fe73573 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -18,6 +18,7 @@ #include "nodes/nodes.h" #include "utils/fmgrprotos.h" +#define MAXINT8LEN 21 /* bool.c */ extern bool parse_bool(const char *value, bool *result); @@ -46,6 +47,7 @@ extern int32 pg_atoi(const char *s, int size, int c); extern int16 pg_strtoint16(const char *s); extern int32 pg_strtoint32(const char *s); extern void pg_itoa(int16 i, char *a); +static int32 pg_ltoa_n(int32 l, char *a); extern void pg_ltoa(int32 l, char *a); extern void pg_lltoa(int64 ll, char *a); extern char *pg_ltostr_zeropad(char *str, int32 value, int32 minwidth); --------------2.21.0--