Hi, I started to analyze XLogInsert because it was the major bottleneck when creating some materialized view/cached tables/whatever. Analyzing it I could see that content of the COMP_CRC32 macro was taking most of the time which isn't immediately obvious when you profile because it obviously doesn't show up as a separate function. I first put it into functions to make it easier to profile. I couldn't measure any difference for COPY, CTAS and a simple pgbench run on 3 kinds of hardware (Core2, older Xeon, older Sparc systems).
I looked a bit around for faster implementations of CRC32 and found one in zlib. After adapting it (pg uses slightly different computation (non- inverted)) I found that it increases the speed of the CRC32 calculation itself 3 fold. It does that by not only using one lookup table but four (one for each byte of a word). Those four calculations are independent and thus are considerably faster on somewhat recent hardware. Also it does memory lookups in 4 byte steps instead of 1 byte as the pg version (thats only about ~8% benefit in itself). I wrote a preliminary patch which includes both, the original implementation and the new one switchable via an #define. I tested performance differences in a small number of scenarios: - CTAS/INSERT ... SELECT (8-30%) - COPY (3-20%) - pgbench (no real difference unless directly after a checkpoint) Setup: CREATE TABLE blub (ai int, bi int, aibi int); CREATE TABLE speedtest (ai int, bi int, aibi int); INSERT ... SELECT: Statement: INSERT INTO blub SELECT a.i, b.i, a.i *b.i FROM generate_series(1, 10000) a(i), generate_series(1, 1000) b(i); legacy crc: 11526.588 11406.518 11412.182 11430.245 zlib: 9977.394 9945.408 9840.907 9842.875 COPY: Statement: ('blub' enlarged here 4 times, as otherwise the variances were to large) COPY blub TO '/tmp/b' BINARY; ... CHECKPOINT;TRUNCATE speedtest; COPY speedtest FROM '/tmp/b' BINARY; legacy: 44835.840 44832.876 zlib: 39530.549 39365.109 39295.167 The performance differences are bigger if the table rows are significantly bigger. Do you think something like that is sensible? If yes, I will make it into a proper patch and such. Thanks, Andres INSERT ... SELECT profile before patch: 20.22% postgres postgres [.] comp_crc32 5.77% postgres postgres [.] XLogInsert 5.55% postgres postgres [.] LWLockAcquire 5.21% postgres [kernel. [k] copy_user_generic_string 4.64% postgres postgres [.] LWLockRelease 4.39% postgres postgres [.] ReadBuffer_common 2.75% postgres postgres [.] heap_insert 2.22% postgres libc-2.1 [.] memcpy 2.09% postgres postgres [.] UnlockReleaseBuffer 1.85% postgres postgres [.] hash_any 1.77% postgres [kernel. [k] clear_page_c 1.69% postgres postgres [.] hash_search_with_hash_value 1.61% postgres postgres [.] heapgettup_pagemode 1.50% postgres postgres [.] PageAddItem 1.42% postgres postgres [.] MarkBufferDirty 1.28% postgres postgres [.] RelationGetBufferForTuple 1.15% postgres postgres [.] ExecModifyTable 1.06% postgres postgres [.] RelationPutHeapTuple After: 9.97% postgres postgres [.] comp_crc32 5.95% postgres [kernel. [k] copy_user_generic_string 5.94% postgres postgres [.] LWLockAcquire 5.64% postgres postgres [.] XLogInsert 5.11% postgres postgres [.] LWLockRelease 4.63% postgres postgres [.] ReadBuffer_common 3.45% postgres postgres [.] heap_insert 2.54% postgres libc-2.1 [.] memcpy 2.03% postgres postgres [.] UnlockReleaseBuffer 1.94% postgres postgres [.] hash_search_with_hash_value 1.84% postgres postgres [.] hash_any 1.73% postgres [kernel. [k] clear_page_c 1.68% postgres postgres [.] PageAddItem 1.62% postgres postgres [.] heapgettup_pagemode 1.52% postgres postgres [.] RelationGetBufferForTuple 1.47% postgres postgres [.] MarkBufferDirty 1.30% postgres postgres [.] ExecModifyTable 1.23% postgres postgres [.] RelationPutHeapTuple
From f8ec18769e581cf039535730d2088466c461d8f6 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Thu, 29 Apr 2010 22:19:08 +0200 Subject: [PATCH] Preliminary patch using an improved out of line crc32 computation for the xlog. --- src/backend/access/transam/xlog.c | 66 +++++++++--------- src/backend/utils/hash/pg_crc.c | 142 ++++++++++++++++++++++++++++++++++++- src/include/utils/pg_crc.h | 9 ++- 3 files changed, 180 insertions(+), 37 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 992a337..ffb9fc4 100644 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** begin:; *** 700,706 **** { /* Simple data, just include it */ len += rdt->len; ! COMP_CRC32(rdata_crc, rdt->data, rdt->len); } else { --- 700,706 ---- { /* Simple data, just include it */ len += rdt->len; ! rdata_crc = comp_crc32(rdata_crc, rdt->data, rdt->len); } else { *************** begin:; *** 715,721 **** else if (rdt->data) { len += rdt->len; ! COMP_CRC32(rdata_crc, rdt->data, rdt->len); } break; } --- 715,721 ---- else if (rdt->data) { len += rdt->len; ! rdata_crc = comp_crc32(rdata_crc, rdt->data, rdt->len); } break; } *************** begin:; *** 732,738 **** else if (rdt->data) { len += rdt->len; ! COMP_CRC32(rdata_crc, rdt->data, rdt->len); } break; } --- 732,738 ---- else if (rdt->data) { len += rdt->len; ! rdata_crc = comp_crc32(rdata_crc, rdt->data, rdt->len); } break; } *************** begin:; *** 757,781 **** BkpBlock *bkpb = &(dtbuf_xlg[i]); char *page; ! COMP_CRC32(rdata_crc, ! (char *) bkpb, ! sizeof(BkpBlock)); page = (char *) BufferGetBlock(dtbuf[i]); if (bkpb->hole_length == 0) { ! COMP_CRC32(rdata_crc, ! page, ! BLCKSZ); } else { /* must skip the hole */ ! COMP_CRC32(rdata_crc, ! page, ! bkpb->hole_offset); ! COMP_CRC32(rdata_crc, ! page + (bkpb->hole_offset + bkpb->hole_length), ! BLCKSZ - (bkpb->hole_offset + bkpb->hole_length)); } } } --- 757,781 ---- BkpBlock *bkpb = &(dtbuf_xlg[i]); char *page; ! rdata_crc = comp_crc32(rdata_crc, ! (char *) bkpb, ! sizeof(BkpBlock)); page = (char *) BufferGetBlock(dtbuf[i]); if (bkpb->hole_length == 0) { ! rdata_crc = comp_crc32(rdata_crc, ! page, ! BLCKSZ); } else { /* must skip the hole */ ! rdata_crc = comp_crc32(rdata_crc, ! page, ! bkpb->hole_offset); ! rdata_crc = comp_crc32(rdata_crc, ! page + (bkpb->hole_offset + bkpb->hole_length), ! BLCKSZ - (bkpb->hole_offset + bkpb->hole_length)); } } } *************** begin:; *** 980,987 **** record->xl_rmid = rmid; /* Now we can finish computing the record's CRC */ ! COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32), ! SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32(rdata_crc); record->xl_crc = rdata_crc; --- 980,987 ---- record->xl_rmid = rmid; /* Now we can finish computing the record's CRC */ ! rdata_crc = comp_crc32(rdata_crc, (char *) record + sizeof(pg_crc32), ! SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32(rdata_crc); record->xl_crc = rdata_crc; *************** RecordIsValid(XLogRecord *record, XLogRe *** 3550,3556 **** /* First the rmgr data */ INIT_CRC32(crc); ! COMP_CRC32(crc, XLogRecGetData(record), len); /* Add in the backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; --- 3550,3556 ---- /* First the rmgr data */ INIT_CRC32(crc); ! crc = comp_crc32(crc, XLogRecGetData(record), len); /* Add in the backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; *************** RecordIsValid(XLogRecord *record, XLogRe *** 3570,3576 **** return false; } blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; ! COMP_CRC32(crc, blk, blen); blk += blen; } --- 3570,3576 ---- return false; } blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; ! crc = comp_crc32(crc, blk, blen); blk += blen; } *************** RecordIsValid(XLogRecord *record, XLogRe *** 3584,3591 **** } /* Finally include the record header */ ! COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), ! SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32(crc); if (!EQ_CRC32(record->xl_crc, crc)) --- 3584,3591 ---- } /* Finally include the record header */ ! crc = comp_crc32(crc, (char *) record + sizeof(pg_crc32), ! SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32(crc); if (!EQ_CRC32(record->xl_crc, crc)) *************** WriteControlFile(void) *** 4450,4458 **** /* Contents are protected with a CRC */ INIT_CRC32(ControlFile->crc); ! COMP_CRC32(ControlFile->crc, ! (char *) ControlFile, ! offsetof(ControlFileData, crc)); FIN_CRC32(ControlFile->crc); /* --- 4450,4458 ---- /* Contents are protected with a CRC */ INIT_CRC32(ControlFile->crc); ! ControlFile->crc = comp_crc32(ControlFile->crc, ! (char *) ControlFile, ! offsetof(ControlFileData, crc)); FIN_CRC32(ControlFile->crc); /* *************** ReadControlFile(void) *** 4550,4558 **** /* Now check the CRC. */ INIT_CRC32(crc); ! COMP_CRC32(crc, ! (char *) ControlFile, ! offsetof(ControlFileData, crc)); FIN_CRC32(crc); if (!EQ_CRC32(crc, ControlFile->crc)) --- 4550,4558 ---- /* Now check the CRC. */ INIT_CRC32(crc); ! crc = comp_crc32(crc, ! (char *) ControlFile, ! offsetof(ControlFileData, crc)); FIN_CRC32(crc); if (!EQ_CRC32(crc, ControlFile->crc)) *************** UpdateControlFile(void) *** 4688,4696 **** int fd; INIT_CRC32(ControlFile->crc); ! COMP_CRC32(ControlFile->crc, ! (char *) ControlFile, ! offsetof(ControlFileData, crc)); FIN_CRC32(ControlFile->crc); fd = BasicOpenFile(XLOG_CONTROL_FILE, --- 4688,4696 ---- int fd; INIT_CRC32(ControlFile->crc); ! ControlFile->crc = comp_crc32(ControlFile->crc, ! (char *) ControlFile, ! offsetof(ControlFileData, crc)); FIN_CRC32(ControlFile->crc); fd = BasicOpenFile(XLOG_CONTROL_FILE, *************** BootStrapXLOG(void) *** 4900,4908 **** memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint)); INIT_CRC32(crc); ! COMP_CRC32(crc, &checkPoint, sizeof(checkPoint)); ! COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), ! SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32(crc); record->xl_crc = crc; --- 4900,4908 ---- memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint)); INIT_CRC32(crc); ! crc = comp_crc32(crc, &checkPoint, sizeof(checkPoint)); ! crc = comp_crc32(crc, (char *) record + sizeof(pg_crc32), ! SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32(crc); record->xl_crc = crc; diff --git a/src/backend/utils/hash/pg_crc.c b/src/backend/utils/hash/pg_crc.c index 2ef62e5..32ba6f6 100644 *** a/src/backend/utils/hash/pg_crc.c --- b/src/backend/utils/hash/pg_crc.c *************** *** 26,32 **** /* Use c.h so that this file can be built in either frontend or backend */ #include "c.h" ! /* * This table is based on the polynomial --- 26,32 ---- /* Use c.h so that this file can be built in either frontend or backend */ #include "c.h" ! #include "utils/pg_crc.h" /* * This table is based on the polynomial *************** const uint32 pg_crc32_table[256] = { *** 100,105 **** --- 100,245 ---- 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; + /* + * 1: pg version + * 2: adapted zlib version + */ + #define CRC_VERSION 2 + #if CRC_VERSION == 1 + + /* Accumulate some (more) bytes into a CRC */ + pg_crc32 comp_crc32(pg_crc32 crc, const void *data, uint32 len){ + pg_crc32 start = crc; + uint32 start_len = len; + do { + unsigned char *__data = (unsigned char *) (data); + while (len-- > 0){ + int __tab_index = ((int) ((crc) >> 24) ^ *__data++) & 0xFF; + crc = pg_crc32_table[__tab_index] ^ ((crc) << 8); + } + } while (0); + + return crc; + } + + + #elif CRC_VERSION == 2 + + static inline uint32 swab32(const uint32 x); + static inline uint32 swab32(const uint32 x){ + return ((x & (uint32)0x000000ffUL) << 24) | + ((x & (uint32)0x0000ff00UL) << 8) | + ((x & (uint32)0x00ff0000UL) >> 8) | + ((x & (uint32)0xff000000UL) >> 24); + } + + #if defined __BIG_ENDIAN__ + #define cpu_to_be32(x) + #else + #define cpu_to_be32(x) swab32(x) + #endif + + #define unlikely(x) __builtin_expect(!!(x), 0) + #define likely(x) __builtin_expect(!!(x), 1) + + static pg_crc32 crc_table[4][256]; + + static void prepare(){ + /* generate crc for each value followed by one, two, and three zeros*/ + int n; + int k; + pg_crc32 c; + + for (n = 0; n < 256; n++) { + c = pg_crc32_table[n]; + crc_table[0][n] = c; + } + + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + for (k = 1; k < 4; k++) { + //orig: + //c = crc_table[0][c & 0xff] ^ (c >> 8); + //pg compliant + c = crc_table[0][(c >> 24)] ^ (c << 8); + crc_table[k][n] = c; + } + } + } + + + /* ========================================================================= */ + #define DOCRC4 d = *buf4++; c ^= cpu_to_be32(d); \ + c = crc_table[0][c & 0xff] ^ crc_table[1][(c >> 8) & 0xff] ^ \ + crc_table[2][(c >> 16) & 0xff] ^ crc_table[3][c >> 24] + #define DOCRC32 DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4 + + /* ========================================================================= */ + pg_crc32 comp_crc32(pg_crc32 crc, const void *data2, uint32 len) + { + /* + * XXX: That table should likely be precomputed at compile time, + * but for now I didnt bother. + */ + static int initialized = 0; + if(!initialized){ + prepare(); + initialized = 1; + } + + unsigned char *data = (unsigned char *) (data2); + uint32 c = crc; + const uint32 *buf4; + uint32 d; + + /* + * If the input is not on a 4byte boundary, align it. Its dubious + * if we actually need that, but the costs of that check should be + * marginal. + */ + while (unlikely(len && ((ptrdiff_t)data & 3))) { + //printf("align\n"); + c = crc_table[0][((c >> 24) ^ *data++) & 0xff] ^ (c << 8); + len--; + } + + + buf4 = (const uint32 *)(const void *)data; + /* + * manually unrolling the loop seems to be faster on most + * hardware/compiler combination I tried. + */ + while (likely(len >= 32)) { + DOCRC32; + len -= 32; + //printf("crc: %x after bigstep\n", c); + } + + /* + * There very validly might be input which is not a multiple of 32byte. + */ + while (len >= 4) { + DOCRC4; + len -= 4; + //printf("crc: %x after step2\n", c); + } + + data = (unsigned char *)buf4; + /* + * Computed unaligned end of data + */ + if (unlikely(len)){ + //printf("unalign\n"); + do { + c = crc_table[0][((c >> 24) ^ *data++) & 0xff] ^ (c << 8); + } + while (--len); + } + return c; + } + #undef DOCRC4 + #undef DOCRC32 + #endif #ifdef PROVIDE_64BIT_CRC diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h index 3e34d62..32de738 100644 *** a/src/include/utils/pg_crc.h --- b/src/include/utils/pg_crc.h *************** *** 31,36 **** --- 31,42 ---- typedef uint32 pg_crc32; + extern pg_crc32 comp_crc32(pg_crc32 crc, const void *data, uint32 len); + + + /* Constant table for CRC calculation */ + extern CRCDLLIMPORT const uint32 pg_crc32_table[]; + /* Initialize a CRC accumulator */ #define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF) *************** do { \ *** 53,61 **** /* Check for equality of two CRCs */ #define EQ_CRC32(c1,c2) ((c1) == (c2)) - /* Constant table for CRC calculation */ - extern CRCDLLIMPORT const uint32 pg_crc32_table[]; - #ifdef PROVIDE_64BIT_CRC --- 59,64 ---- -- 1.6.5.12.gd65df24
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers