Hi, While looking at binary COPY performance I forgot to add BINARY and was a bit shocked to see printf that high in the profile...
Setup: CREATE TABLE convtest AS SELECT a.i ai, b.i bi, a.i*b.i aibi, (a.i*b.i)::text aibit FROM generate_series(1,1000) a(i), generate_series(1, 10000) b(i); Profile with an unmodified pg: speedtest=# COPY convtest(ai,bi,aibi) TO '/dev/null'; COPY 10000000 Time: 9192.476 ms Profile: # Events: 9K cycles # # Overhead Command Shared Object Symbol # ........ ............... ................. ............................ # 18.24% postgres_oldint libc-2.12.1.so [.] __GI_vfprintf 8.90% postgres_oldint libc-2.12.1.so [.] _itoa_word 8.77% postgres_oldint postgres_oldint [.] CopyOneRowTo 8.19% postgres_oldint libc-2.12.1.so [.] _IO_default_xsputn_internal 3.67% postgres_oldint postgres_oldint [.] AllocSetAlloc 3.38% postgres_oldint libc-2.12.1.so [.] __strchrnul 3.24% postgres_oldint libc-2.12.1.so [.] __GI___vsprintf_chk 2.87% postgres_oldint postgres_oldint [.] heap_deform_tuple 2.49% postgres_oldint libc-2.12.1.so [.] _IO_old_init 2.25% postgres_oldint libc-2.12.1.so [.] _IO_new_file_xsputn 2.03% postgres_oldint postgres_oldint [.] appendBinaryStringInfo 1.89% postgres_oldint postgres_oldint [.] heapgettup_pagemode 1.86% postgres_oldint postgres_oldint [.] FunctionCall1 1.85% postgres_oldint postgres_oldint [.] AllocSetCheck 1.79% postgres_oldint postgres_oldint [.] enlargeStringInfo Timing after replacing those sprintf("%li", ...) calls with a quickly coded handrolled itoa: speedtest=# COPY convtest(ai,bi,aibi) TO '/dev/null'; COPY 10000000 Time: 5309.928 ms Profile: # Events: 5K cycles # # Overhead Command Shared Object Symbol # ........ ........ ................. ........................... # 14.96% postgres postgres [.] pg_s32toa 14.75% postgres postgres [.] CopyOneRowTo 5.97% postgres postgres [.] AllocSetAlloc 4.73% postgres postgres [.] heap_deform_tuple 4.54% postgres postgres [.] AllocSetCheck 4.01% postgres libc-2.12.1.so [.] _IO_new_file_xsputn 3.59% postgres postgres [.] heapgettup_pagemode 3.32% postgres postgres [.] enlargeStringInfo 3.25% postgres postgres [.] appendBinaryStringInfo 2.87% postgres postgres [.] CopySendChar 2.65% postgres postgres [.] FunctionCall1 2.44% postgres postgres [.] int4out 2.38% postgres [kernel.kallsyms] [k] copy_user_generic_string 2.30% postgres postgres [.] AllocSetReset 2.06% postgres postgres [.] pg_server_to_client 1.89% postgres libc-2.12.1.so [.] __GI_memset 1.87% postgres libc-2.12.1.so [.] memcpy A change from 9192.476ms 5309.928ms seems to be pretty good indication that a change in that area is waranted given integer columns are quite ubiquous... While at it: * I remove the outdated -- NOTE: int[24] operators never check for over/underflow! -- Some of these answers are consequently numerically incorrect. warnings in the regressions tests. * I renamed pg_[il]toa to pg_s(16|32|64)toa - I found the names confusing. Not sure if its worth it. * I added some tests for the border cases of 2^31-1 / -2^31 The 'after' profile shows obvious room for furhter improvement, but on a quick look I couldn't think of anything. Any Ideas? Andres PS: Oh, thats with assertions, but the results are comparable without them (8765.796ms vs 4561.673ms)
From 328ae1e35988f8670323b67167256e00cb5cfde7 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Sun, 31 Oct 2010 21:52:08 +0100 Subject: [PATCH] Implement custom int[248]->string conversion routines out of speed reasons. While at it: * Add a few tests for int[248]out * remove some old comments about int[24] ops not checking for overflow * rename pg_[il]toa to pg_s(16|32)toa for clarities sake. --- src/backend/utils/adt/int.c | 6 +- src/backend/utils/adt/int8.c | 8 +-- src/backend/utils/adt/numutils.c | 113 +++++++++++++++++++++++++++++++----- src/include/utils/builtins.h | 6 +- src/test/regress/expected/int2.out | 15 ++++- src/test/regress/expected/int4.out | 15 ++++- src/test/regress/expected/int8.out | 13 ++++ src/test/regress/regress.c | 2 +- src/test/regress/sql/int2.sql | 6 +- src/test/regress/sql/int4.sql | 6 +- src/test/regress/sql/int8.sql | 4 + 11 files changed, 160 insertions(+), 34 deletions(-) diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index c450333..5340052 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -74,7 +74,7 @@ int2out(PG_FUNCTION_ARGS) int16 arg1 = PG_GETARG_INT16(0); char *result = (char *) palloc(7); /* sign, 5 digits, '\0' */ - pg_itoa(arg1, result); + pg_s16toa(arg1, result); PG_RETURN_CSTRING(result); } @@ -189,7 +189,7 @@ int2vectorout(PG_FUNCTION_ARGS) { if (num != 0) *rp++ = ' '; - pg_itoa(int2Array->values[num], rp); + pg_s16toa(int2Array->values[num], rp); while (*++rp != '\0') ; } @@ -293,7 +293,7 @@ int4out(PG_FUNCTION_ARGS) int32 arg1 = PG_GETARG_INT32(0); char *result = (char *) palloc(12); /* sign, 10 digits, '\0' */ - pg_ltoa(arg1, result); + pg_s32toa(arg1, result); PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 894110d..517b2b9 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -20,6 +20,7 @@ #include "funcapi.h" #include "libpq/pqformat.h" #include "utils/int8.h" +#include "utils/builtins.h" #define MAXINT8LEN 25 @@ -158,12 +159,9 @@ int8out(PG_FUNCTION_ARGS) { int64 val = PG_GETARG_INT64(0); char *result; - int len; - char buf[MAXINT8LEN + 1]; - - if ((len = snprintf(buf, MAXINT8LEN, INT64_FORMAT, val)) < 0) - elog(ERROR, "could not format int8"); + char buf[MAXINT8LEN + 2]; + pg_s64toa(val, buf); result = pstrdup(buf); PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 5f8083f..cd330d1 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -3,8 +3,6 @@ * numutils.c * utility functions for I/O of built-in numeric types. * - * integer: pg_atoi, pg_itoa, pg_ltoa - * * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -109,27 +107,112 @@ pg_atoi(char *s, int size, int c) } /* - * pg_itoa - converts a short int to its string represention + * pg_s32toa - convert a signed 16bit integer to a string representation * - * Note: - * previously based on ~ingres/source/gutil/atoi.c - * now uses vendor's sprintf conversion + * It doesnt seem worth implementing this separately. */ void -pg_itoa(int16 i, char *a) +pg_s16toa(int16 i, char *a) { - sprintf(a, "%hd", (short) i); + pg_s32toa((int32)i, a); } + /* - * pg_ltoa - converts a long int to its string represention + * pg_s32toa - convert a signed 32bit integer to a string representation * - * Note: - * previously based on ~ingres/source/gutil/atoi.c - * now uses vendor's sprintf conversion + * Its unfortunate to have this function twice - once for 32bit, once + * for 64bit, but incurring the cost of 64bit computation to 32bit + * platforms doesn't seem to be acceptable. */ void -pg_ltoa(int32 l, char *a) -{ - sprintf(a, "%d", l); +pg_s32toa(int32 value, char *buf){ + char *bufstart = buf; + int neg = 0; + + //Avoid problems with the most negative not being representable as + //a positive number + if(value == INT32_MIN){ + memcpy(buf, "-2147483648\0", 12); + return; + } + else if(value < 0){ + value = -value; + neg = 1; + } + + do{ + int32 oldval = value; + /* + * division by constants can be optimized by some modern + * compilers (including gcc). We could add the concrete, + * optimized, calculatation here to be fast at -O0 and/or + * other compilers... Not sure if its worth doing. + */ + value /= 10; + *buf++ = '0' + (oldval - value * 10); + } + while(value != 0); + + if(neg){ + *buf++ = '-'; + } + + //have to reorder the string, but not 0byte. + *buf-- = 0; + + while(bufstart < buf){ + char swap = *bufstart; + *bufstart++ = *buf; + *buf-- = swap; + } +} + +/* + * pg_s64toa - convert a signed 64bit integer to a string representation + */ +void pg_s64toa(int64 value, char *buf){ + char *bufstart = buf; + int neg = 0; + + //Avoid problems with the most negative not being representable as + //a positive number + if(value == INT64_MIN){ + memcpy(buf, "-9223372036854775808\0", 21); + return; + } + else if(value < 0){ + value = -value; + neg = 1; + } + + do{ + int64 oldval = value; + /* + * division by constants can be optimized by some modern + * compilers (including gcc). We could add the concrete, + * optimized, calculatation here to be fast at -O0 and/or + * other compilers... Not sure if its worth doing. + * Its something like: + * + */ + value /= 10; + *buf++ = '0' + (oldval - value * 10); + } + while(value != 0); + + if(neg){ + *buf++ = '-'; + } + + //have to reorder the string, but not 0byte. + *buf-- = 0; + + while(bufstart < buf){ + char swap = *bufstart; + *bufstart = *buf; + *buf = swap; + buf--; + bufstart++; + } } diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index f4b2a96..ba83ef2 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -273,8 +273,10 @@ extern Datum current_schemas(PG_FUNCTION_ARGS); /* numutils.c */ extern int32 pg_atoi(char *s, int size, int c); -extern void pg_itoa(int16 i, char *a); -extern void pg_ltoa(int32 l, char *a); + +extern void pg_s16toa(int16 l, char *a); +extern void pg_s32toa(int32 l, char *a); +extern void pg_s64toa(int64 l, char *a); /* * Per-opclass comparison functions for new btrees. These are diff --git a/src/test/regress/expected/int2.out b/src/test/regress/expected/int2.out index 61ac956..3bb26b3 100644 --- a/src/test/regress/expected/int2.out +++ b/src/test/regress/expected/int2.out @@ -1,7 +1,5 @@ -- -- INT2 --- NOTE: int2 operators never check for over/underflow! --- Some of these answers are consequently numerically incorrect. -- CREATE TABLE INT2_TBL(f1 int2); INSERT INTO INT2_TBL(f1) VALUES ('0 '); @@ -244,3 +242,16 @@ SELECT '' AS five, i.f1, i.f1 / int4 '2' AS x FROM INT2_TBL i; | -32767 | -16383 (5 rows) +-- corner cases +SELECT (1<<15-1)::int2::text; + text +------- + 16384 +(1 row) + +SELECT (-1<<15)::int2::text; + text +-------- + -32768 +(1 row) + diff --git a/src/test/regress/expected/int4.out b/src/test/regress/expected/int4.out index a21bbda..42095c7 100644 --- a/src/test/regress/expected/int4.out +++ b/src/test/regress/expected/int4.out @@ -1,7 +1,5 @@ -- -- INT4 --- WARNING: int4 operators never check for over/underflow! --- Some of these answers are consequently numerically incorrect. -- CREATE TABLE INT4_TBL(f1 int4); INSERT INTO INT4_TBL(f1) VALUES (' 0 '); @@ -331,3 +329,16 @@ SELECT (2 + 2) / 2 AS two; 2 (1 row) +-- corner cases +SELECT (1<<31-1)::int4::text; + text +------------ + 1073741824 +(1 row) + +SELECT (1<<31)::int4::text; + text +------------- + -2147483648 +(1 row) + diff --git a/src/test/regress/expected/int8.out b/src/test/regress/expected/int8.out index c8e2dad..e156067 100644 --- a/src/test/regress/expected/int8.out +++ b/src/test/regress/expected/int8.out @@ -802,3 +802,16 @@ SELECT * FROM generate_series('+4567890123456789'::int8, '+4567890123456799'::in 4567890123456799 (6 rows) +-- corner cases +SELECT (1<<63-1)::int8::text; + text +------------ + 1073741824 +(1 row) + +SELECT (1<<63)::int8::text; + text +------------- + -2147483648 +(1 row) + diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 8e4286a..e14c985 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -729,7 +729,7 @@ int44out(PG_FUNCTION_ARGS) walk = result; for (i = 0; i < 4; i++) { - pg_ltoa(an_array[i], walk); + pg_s32toa(an_array[i], walk); while (*++walk != '\0') ; *walk++ = ' '; diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql index bf4efba..8bffe53 100644 --- a/src/test/regress/sql/int2.sql +++ b/src/test/regress/sql/int2.sql @@ -1,7 +1,5 @@ -- -- INT2 --- NOTE: int2 operators never check for over/underflow! --- Some of these answers are consequently numerically incorrect. -- CREATE TABLE INT2_TBL(f1 int2); @@ -85,3 +83,7 @@ SELECT '' AS five, i.f1, i.f1 - int4 '2' AS x FROM INT2_TBL i; SELECT '' AS five, i.f1, i.f1 / int2 '2' AS x FROM INT2_TBL i; SELECT '' AS five, i.f1, i.f1 / int4 '2' AS x FROM INT2_TBL i; + +-- corner cases +SELECT (1<<15-1)::int2::text; +SELECT (-1<<15)::int2::text; diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql index 5212c68..39bfec1 100644 --- a/src/test/regress/sql/int4.sql +++ b/src/test/regress/sql/int4.sql @@ -1,7 +1,5 @@ -- -- INT4 --- WARNING: int4 operators never check for over/underflow! --- Some of these answers are consequently numerically incorrect. -- CREATE TABLE INT4_TBL(f1 int4); @@ -125,3 +123,7 @@ SELECT 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 AS ten; SELECT 2 + 2 / 2 AS three; SELECT (2 + 2) / 2 AS two; + +-- corner cases +SELECT (1<<31-1)::int4::text; +SELECT (1<<31)::int4::text; diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql index 648563c..7fff03c 100644 --- a/src/test/regress/sql/int8.sql +++ b/src/test/regress/sql/int8.sql @@ -190,3 +190,7 @@ SELECT q1, q1 << 2 AS "shl", q1 >> 3 AS "shr" FROM INT8_TBL; SELECT * FROM generate_series('+4567890123456789'::int8, '+4567890123456799'::int8); SELECT * FROM generate_series('+4567890123456789'::int8, '+4567890123456799'::int8, 0); SELECT * FROM generate_series('+4567890123456789'::int8, '+4567890123456799'::int8, 2); + +-- corner cases +SELECT (1<<63-1)::int8::text; +SELECT (1<<63)::int8::text; -- 1.7.3.rc1.5.g73aa2
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers