Folks,

Please find attached a couple of patches intended to $subject.

This patch set cut the time to copy ten million rows of randomly sized
int8s (10 of them) by about a third, so at least for that case, it's
pretty decent.

Thanks to Andrew Gierth for lots of patient help.

Best,
David.
-- 
David Fetter <david(at)fetter(dot)org> http://fetter.org/
Phone: +1 415 235 3778

Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate
>From 6e8136ece5b01ca9cd16bdb974c4d54e939c92cf Mon Sep 17 00:00:00 2001
From: David Fetter <da...@fetter.org>
Date: Tue, 10 Sep 2019 02:06:31 -0700
Subject: [PATCH v1 1/2] Output digits two at a time in sprintf.c
To: hackers
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.21.0"

This is a multi-part message in MIME format.
--------------2.21.0
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit


diff --git a/src/port/snprintf.c b/src/port/snprintf.c
index 8fd997553e..fd9d384144 100644
--- a/src/port/snprintf.c
+++ b/src/port/snprintf.c
@@ -1014,9 +1014,60 @@ fmtint(long long value, char type, int forcesign, int leftjust,
 	   PrintfTarget *target)
 {
 	unsigned long long base;
+	unsigned long long square;
 	unsigned long long uvalue;
 	int			dosign;
-	const char *cvt = "0123456789abcdef";
+	/* Maps for octal, decimal, and two flavors of hexadecimal */
+	const char *digits;
+	const char	decimal_digits[200] =
+					/* 10^2 * 2 decimal digits */
+					"0001020304050607080910111213141516171819"
+					"2021222324252627282930313233343536373839"
+					"4041424344454647484950515253545556575859"
+					"6061626364656667686970717273747576777879"
+					"8081828384858687888990919293949596979899";
+	const char	octal_digits[128] =
+					/* 8^2 * 2 octal digits */
+					"00010203040506071011121314151617"
+					"20212223242526273031323334353637"
+					"40414243444546475051525354555657"
+					"60616263646566677071727374757677";
+					/* 16^2 * 2 hex digits */
+	const char	hex_lower_digits[512] =
+					"000102030405060708090a0b0c0d0e0f"
+					"101112131415161718191a1b1c1d1e1f"
+					"202122232425262728292a2b2c2d2e2f"
+					"303132333435363738393a3b3c3d3e3f"
+					"404142434445464748494a4b4c4d4e4f"
+					"505152535455565758595a5b5c5d5e5f"
+					"606162636465666768696a6b6c6d6e6f"
+					"707172737475767778797a7b7c7d7e7f"
+					"808182838485868788898a8b8c8d8e8f"
+					"909192939495969798999a9b9c9d9e9f"
+					"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
+					"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
+					"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
+					"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
+					"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
+					"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
+	const char	hex_upper_digits[512] =
+					/* 16^2 * 2 HEX DIGITS */
+					"000102030405060708090A0B0C0D0E0F"
+					"101112131415161718191A1B1C1D1E1F"
+					"202122232425262728292A2B2C2D2E2F"
+					"303132333435363738393A3B3C3D3E3F"
+					"404142434445464748494A4B4C4D4E4F"
+					"505152535455565758595A5B5C5D5E5F"
+					"606162636465666768696A6B6C6D6E6F"
+					"707172737475767778797A7B7C7D7E7F"
+					"808182838485868788898A8B8C8D8E8F"
+					"909192939495969798999A9B9C9D9E9F"
+					"A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
+					"B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
+					"C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
+					"D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
+					"E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
+					"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
 	int			signvalue = 0;
 	char		convert[64];
 	int			vallen = 0;
@@ -1027,23 +1078,27 @@ fmtint(long long value, char type, int forcesign, int leftjust,
 	{
 		case 'd':
 		case 'i':
+			digits = decimal_digits;
 			base = 10;
 			dosign = 1;
 			break;
 		case 'o':
+			digits = octal_digits;
 			base = 8;
 			dosign = 0;
 			break;
 		case 'u':
+			digits = decimal_digits;
 			base = 10;
 			dosign = 0;
 			break;
 		case 'x':
+			digits = hex_lower_digits;
 			base = 16;
 			dosign = 0;
 			break;
 		case 'X':
-			cvt = "0123456789ABCDEF";
+			digits = hex_upper_digits;
 			base = 16;
 			dosign = 0;
 			break;
@@ -1051,6 +1106,8 @@ fmtint(long long value, char type, int forcesign, int leftjust,
 			return;				/* keep compiler quiet */
 	}
 
+	square = base * base;
+
 	/* disable MSVC warning about applying unary minus to an unsigned value */
 #if _MSC_VER
 #pragma warning(push)
@@ -1073,12 +1130,20 @@ fmtint(long long value, char type, int forcesign, int leftjust,
 		vallen = 0;
 	else
 	{
-		/* make integer string */
-		do
+		/* make integer string, two digits at a time */
+		while(uvalue >= base)
 		{
-			convert[sizeof(convert) - (++vallen)] = cvt[uvalue % base];
-			uvalue = uvalue / base;
-		} while (uvalue);
+			const int i = (uvalue % square) * 2;
+			uvalue /= square;
+			vallen += 2;
+			memcpy(convert + sizeof(convert) - vallen, digits + i, 2);
+		}
+		/* Account for single digit */
+		if (uvalue > 0 || vallen == 0)
+		{
+			vallen++;
+			memcpy(convert + sizeof(convert) - vallen, digits + uvalue * 2 + 1, 1);
+		}
 	}
 
 	zeropad = Max(0, precision - vallen);

--------------2.21.0--


>From f4a3729900484292cce5066be6a6f183f489ae8c Mon Sep 17 00:00:00 2001
From: David Fetter <da...@fetter.org>
Date: Sun, 15 Sep 2019 00:06:29 -0700
Subject: [PATCH v1 2/2] Made int8 operations more efficent
To: hackers
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.21.0"

This is a multi-part message in MIME format.
--------------2.21.0
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit


- Output routines now do more digits per iteration, and
- Code determines the number of decimal digits in an int8 efficiently

diff --git a/src/backend/access/common/printsimple.c b/src/backend/access/common/printsimple.c
index 651ade14dd..17ca533b87 100644
--- a/src/backend/access/common/printsimple.c
+++ b/src/backend/access/common/printsimple.c
@@ -112,7 +112,7 @@ printsimple(TupleTableSlot *slot, DestReceiver *self)
 			case INT8OID:
 				{
 					int64		num = DatumGetInt64(value);
-					char		str[23];	/* sign, 21 digits and '\0' */
+					char		str[MAXINT8LEN];
 
 					pg_lltoa(num, str);
 					pq_sendcountedtext(&buf, str, strlen(str), false);
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 580043233b..3818dbaa85 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -39,6 +39,8 @@ jsonpath_scan.c: FLEX_NO_BACKUP=yes
 # jsonpath_scan is compiled as part of jsonpath_gram
 jsonpath_gram.o: jsonpath_scan.c
 
+numutils.o: CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT)
+
 # jsonpath_gram.c and jsonpath_scan.c are in the distribution tarball,
 # so they are not cleaned here.
 clean distclean maintainer-clean:
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 0ff9394a2f..6230807906 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -27,8 +27,6 @@
 #include "utils/builtins.h"
 
 
-#define MAXINT8LEN		25
-
 typedef struct
 {
 	int64		current;
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 70138feb29..f75faa9255 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -20,6 +20,44 @@
 
 #include "common/int.h"
 #include "utils/builtins.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * A table of all two-digit numbers. This is used to speed up decimal digit
+ * generation by copying pairs of digits into the final output.
+ */
+static const char DIGIT_TABLE[200] = {
+	'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+	'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+	'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+	'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+	'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+	'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+	'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+	'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+	'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+	'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
+};
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline uint32
+decimalLength(const uint64 v)
+{
+	uint32			t;
+	static uint64	PowersOfTen[] =
+	{1,                10,                100,
+	 1000,             10000,             100000,
+	 1000000,          10000000,          100000000,
+	 1000000000,       10000000000,       100000000000,
+	 1000000000000,    10000000000000,    100000000000000,
+	 1000000000000000, 10000000000000000, 100000000000000000,
+	 1000000000000000000};
+
+	t = (pg_leftmost_one_pos64(v) + 1)*1233/4096;
+	return t + (v >= PowersOfTen[t]);
+}
 
 /*
  * pg_atoi: convert string to integer
@@ -333,13 +371,13 @@ pg_ltoa(int32 value, char *a)
  * pg_lltoa: convert a signed 64-bit integer to its string representation
  *
  * Caller must ensure that 'a' points to enough memory to hold the result
- * (at least MAXINT8LEN+1 bytes, counting a leading sign and trailing NUL).
+ * (at least MAXINT8LEN bytes, counting a leading sign and trailing NUL).
  */
 void
 pg_lltoa(int64 value, char *a)
 {
-	char	   *start = a;
-	bool		neg = false;
+	uint32	olength;
+	uint32	i = 0;
 
 	/*
 	 * Avoid problems with the most negative integer not being representable
@@ -350,37 +388,83 @@ pg_lltoa(int64 value, char *a)
 		memcpy(a, "-9223372036854775808", 21);
 		return;
 	}
-	else if (value < 0)
+
+	/* Might as well handle this case, too */
+	if (value == 0)
+	{
+		memcpy(a, "0", 2);
+		return;
+	}
+
+	if (value < 0)
 	{
 		value = -value;
-		neg = true;
-	}
-
-	/* Compute the result string backwards. */
-	do
-	{
-		int64		remainder;
-		int64		oldval = value;
-
-		value /= 10;
-		remainder = oldval - value * 10;
-		*a++ = '0' + remainder;
-	} while (value != 0);
-
-	if (neg)
 		*a++ = '-';
+	}
+
+	olength = decimalLength(value);
+
+	/* Compute the result string. */
+	while (value >= 100000000)
+	{
+		/* Expensive 64-bit division. Optimize? */
+		const	uint64 q = value / 100000000;
+		uint32	value2 = (uint32) (value - 100000000 * q);
+
+		value = q;
+
+		const uint32 c = value2 % 10000;
+		const uint32 d = value2 / 10000;
+		const uint32 c0 = (c % 100) << 1;
+		const uint32 c1 = (c / 100) << 1;
+		const uint32 d0 = (d % 100) << 1;
+		const uint32 d1 = (d / 100) << 1;
+
+		value = q;
+
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2);
+		memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2);
+		memcpy(a + olength - i - 6, DIGIT_TABLE + d0, 2);
+		memcpy(a + olength - i - 8, DIGIT_TABLE + d1, 2);
+		i += 8;
+	}
+
+	uint32		value2 = (uint32) value;
 
-	/* Add trailing NUL byte, and back up 'a' to the last character. */
-	*a-- = '\0';
+	while (value2 >= 10000)
+	{
+		const	uint32 c = value2 - 10000 * (value2 / 10000);
+
+		value2 /= 10000;
+
+		const	uint32 c0 = (c % 100) << 1;
+		const	uint32 c1 = (c / 100) << 1;
+
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2);
+		memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2);
+		i += 4;
+	}
+	if (value2 >= 100)
+	{
+		const uint32 c = (value2 % 100) << 1;
 
-	/* Reverse string. */
-	while (start < a)
+		value2 /= 100;
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2);
+		i += 2;
+	}
+	if (value2 >= 10)
 	{
-		char		swap = *start;
+		const uint32 c = value2 << 1;
 
-		*start++ = *a;
-		*a-- = swap;
+		memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2);
+		i += 2;
 	}
+	else
+	{
+		*a = (char) ('0' + value2);
+	}
+
+	a[olength] = '\0';
 }
 
 
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 937ddb7ef0..9e8392741e 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -18,6 +18,7 @@
 #include "nodes/nodes.h"
 #include "utils/fmgrprotos.h"
 
+#define MAXINT8LEN 21
 
 /* bool.c */
 extern bool parse_bool(const char *value, bool *result);

--------------2.21.0--


Reply via email to