Gregory Stark wrote: > I think we're talking past each other. Martin and I are talking about doing > something like: > > for (...) > ... > crc(word including hint bits) > ... > for (each line pointer) > crc-negated(word & LP_DEAD<<15) > > Because CRC is a cyclic checksum it's possible to add or remove bits > incrementally.
I see. Since our CRC implementation is a simple byte loop, and since ItemIdData fits in a uint32, the attached patch should do mostly the same by copying the line pointer into a uint32, turning off the lp_flags, and summing the modified copy. This patch is also skipping pd_special and the unused area of the page. I'm still testing this; please beware that this likely has an even higher bug density than my regular patches (and some debugging printouts as well). While reading the pg_filedump code I noticed that there's a way to tell the different index pages apart, so perhaps we can use that to be able to checksum the special space as well. -- Alvaro Herrera http://www.CommandPrompt.com/ PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Index: src/backend/access/heap/heapam.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/heap/heapam.c,v retrieving revision 1.269 diff -c -p -r1.269 heapam.c *** src/backend/access/heap/heapam.c 6 Nov 2008 20:51:14 -0000 1.269 --- src/backend/access/heap/heapam.c 13 Nov 2008 17:44:23 -0000 *************** *** 4036,4041 **** --- 4036,4128 ---- } /* + * Perform XLogInsert for hint bits changes in a page. This handles hint + * bits set in HeapTupleHeaderData (t_infomask and t_infomask2). + * + * This is intended to be called right before writing a page from shared + * buffers to disk. + * + * The approach used here, instead of WAL-logging every change, is to produce + * a complete record of the current state of hint bits in a page just before + * flushing it. There are two downsides to this approach: first, it stores + * all hint bits in the page, not only those that changed; and second, that + * the flusher of the page needs to flush a lot more of the WAL (namely up + * to this new record's LSN) than the original LSN marked on the page. + */ + XLogRecPtr + log_hintbits(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, + Page page) + { + xl_heap_hintbits xlrec; + OffsetNumber i; + XLogRecPtr recptr; + XLogRecData rdata[2]; + char *bits; + int pos = 0; + StringInfoData buf; + + /* + * 1 byte for line pointer bits, 2 bytes for infomask, + * 2 bytes for infomask2 + */ + bits = palloc(MaxHeapTuplesPerPage * 5); + + initStringInfo(&buf); + appendStringInfo(&buf, "page %u: ", blkno); + + for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); + i = OffsetNumberNext(i)) + { + HeapTupleHeader htup; + ItemId lp = PageGetItemId(page, i); + + if (!ItemIdHasStorage(lp)) + continue; + + appendStringInfo(&buf, "offset %d: ", i); + + htup = (HeapTupleHeader) PageGetItem(page, lp); + + *((uint16 *) (bits + pos)) = htup->t_infomask & HEAP_XACT_MASK; + appendStringInfo(&buf, "infomask %04x/%04x ", htup->t_infomask, + htup->t_infomask & HEAP_XACT_MASK); + pos += 2; + *((uint16 *) (bits + pos)) = htup->t_infomask2 & HEAP2_XACT_MASK; + appendStringInfo(&buf, "infomask2 %04x/%04x\n", htup->t_infomask2, + htup->t_infomask2 & HEAP2_XACT_MASK); + pos += 2; + } + + elog(LOG, "%s", buf.data); + pfree(buf.data); + + /* NO ELOG(ERROR) from here till hint bits are logged */ + START_CRIT_SECTION(); + + xlrec.node = *rnode; + xlrec.block = blkno; + + rdata[0].data = (char *) &xlrec; + rdata[0].len = SizeOfHeapHintbits; + rdata[0].buffer = InvalidBuffer; + rdata[0].next = &(rdata[1]); + + rdata[1].data = (char *) bits; + rdata[1].len = pos; + rdata[1].buffer = InvalidBuffer; + rdata[1].next = NULL; + + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_HINTBITS, rdata); + + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + + END_CRIT_SECTION(); + + return recptr; + } + + /* * Handles CLEAN and CLEAN_MOVE record types */ static void *************** *** 4153,4158 **** --- 4240,4324 ---- } static void + heap_xlog_hintbits(XLogRecPtr lsn, XLogRecord *record) + { + xl_heap_hintbits *xlrec = (xl_heap_hintbits *) XLogRecGetData(record); + Buffer buffer; + Page page; + + buffer = XLogReadBuffer(xlrec->node, xlrec->block, false); + if (!BufferIsValid(buffer)) + return; + page = (Page) BufferGetPage(buffer); + + if (XLByteLE(lsn, PageGetLSN(page))) + { + UnlockReleaseBuffer(buffer); + return; + } + + if (record->xl_len > SizeOfHeapHintbits) + { + char *bits; + char *bits_end; + OffsetNumber offset = FirstOffsetNumber; + StringInfoData buf; + + + bits = (char *) xlrec + SizeOfHeapHintbits; + bits_end = (char *) xlrec + record->xl_len; + + initStringInfo(&buf); + appendStringInfo(&buf, "page %u: ", xlrec->block); + + while (bits < bits_end) + { + + for (;;) + { + HeapTupleHeader htup; + ItemId lp = PageGetItemId(page, offset); + + if (!ItemIdHasStorage(lp)) + { + offset++; + continue; + } + + appendStringInfo(&buf, "offset %d: ", offset); + + htup = (HeapTupleHeader) PageGetItem(page, lp); + + /* set the right bits in infomask */ + htup->t_infomask = *(uint16 *) bits | + (htup->t_infomask & ~HEAP_XACT_MASK); + appendStringInfo(&buf, "infomask %04x/%04x ", htup->t_infomask, + *(uint16 *) bits); + bits += 2; + + /* set the right bits in infomask2 */ + htup->t_infomask2 = *(uint16 *) bits | + (htup->t_infomask2 & ~HEAP2_XACT_MASK); + appendStringInfo(&buf, "infomask2 %04x/%04x\n", htup->t_infomask2, + *(uint16 *) bits); + bits += 2; + + offset++; + + break; + } + } + elog(LOG, "%s", buf.data); + pfree(buf.data); + } + + PageSetLSN(page, lsn); + PageSetTLI(page, ThisTimeLineID); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + } + + static void heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record) { xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record); *************** *** 4692,4697 **** --- 4858,4866 ---- case XLOG_HEAP2_CLEAN_MOVE: heap_xlog_clean(lsn, record, true); break; + case XLOG_HEAP2_HINTBITS: + heap_xlog_hintbits(lsn, record); + break; default: elog(PANIC, "heap2_redo: unknown op code %u", info); } *************** *** 4833,4838 **** --- 5002,5015 ---- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block); } + else if (info == XLOG_HEAP2_HINTBITS) + { + xl_heap_hintbits *xlrec = (xl_heap_hintbits *) rec; + + appendStringInfo(buf, "hintbits: rel %u/%u/%u; blk %u", + xlrec->node.spcNode, xlrec->node.dbNode, + xlrec->node.relNode, xlrec->block); + } else appendStringInfo(buf, "UNKNOWN"); } Index: src/backend/storage/buffer/bufmgr.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/buffer/bufmgr.c,v retrieving revision 1.241 diff -c -p -r1.241 bufmgr.c *** src/backend/storage/buffer/bufmgr.c 11 Nov 2008 13:19:16 -0000 1.241 --- src/backend/storage/buffer/bufmgr.c 13 Nov 2008 17:44:23 -0000 *************** *** 33,38 **** --- 33,39 ---- #include <sys/file.h> #include <unistd.h> + #include "access/heapam.h" #include "catalog/catalog.h" #include "miscadmin.h" #include "pg_trace.h" *************** *** 43,48 **** --- 44,50 ---- #include "storage/ipc.h" #include "storage/proc.h" #include "storage/smgr.h" + #include "utils/memutils.h" #include "utils/rel.h" #include "utils/resowner.h" *************** *** 1461,1467 **** * BUF_REUSABLE: buffer is available for replacement, ie, it has * pin count 0 and usage count 0. * ! * (BUF_WRITTEN could be set in error if FlushBuffers finds the buffer clean * after locking it, but we don't care all that much.) * * Note: caller must have done ResourceOwnerEnlargeBuffers. --- 1463,1469 ---- * BUF_REUSABLE: buffer is available for replacement, ie, it has * pin count 0 and usage count 0. * ! * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean * after locking it, but we don't care all that much.) * * Note: caller must have done ResourceOwnerEnlargeBuffers. *************** *** 1772,1777 **** --- 1774,1787 ---- { XLogRecPtr recptr; ErrorContextCallback errcontext; + static char *dblbuf = NULL; + bool done = false; + + if (enable_block_checksums && dblbuf == NULL) + { + dblbuf = MemoryContextAlloc(TopMemoryContext, BLCKSZ + ALIGNOF_BUFFER); + dblbuf = (char *) BUFFERALIGN(dblbuf); + } /* * Acquire the buffer's io_in_progress lock. If StartBufferIO returns *************** *** 1796,1801 **** --- 1806,1835 ---- reln->smgr_rnode.relNode); /* + * We make a copy of the buffer to write. + */ + if (enable_block_checksums) + memcpy(dblbuf, BufHdrGetBlock(buf), BLCKSZ); + + /* + * If the page has been modified by a hint bit setter, ensure we WAL-log + * their changes before actually writing the page; otherwise the CRC we're + * about to store could be invalid if the page is torn. Note: we check + * the flag on the shared-memory copy of the buffer, not the private copy + * we just made, to forestall the possibility that hints bits could have + * been set in the later parts of the page after we copied the flag in + * unset state. + */ + if (enable_block_checksums && PageHasUnloggedChange(BufHdrGetBlock(buf)) && + !InRecovery) + { + /* XXX cast away the "volatile" qualifier */ + log_hintbits(&((BufferDesc *) buf)->tag.rnode, buf->tag.forkNum, + buf->tag.blockNum, BufHdrGetBlock(buf)); + done = true; + } + + /* * Force XLOG flush up to buffer's LSN. This implements the basic WAL * rule that log updates must hit disk before any of the data-file changes * they describe do. *************** *** 1817,1823 **** smgrwrite(reln, buf->tag.forkNum, buf->tag.blockNum, ! (char *) BufHdrGetBlock(buf), false); BufferFlushCount++; --- 1851,1857 ---- smgrwrite(reln, buf->tag.forkNum, buf->tag.blockNum, ! enable_block_checksums ? dblbuf : BufHdrGetBlock(buf), false); BufferFlushCount++; Index: src/backend/storage/page/bufpage.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/page/bufpage.c,v retrieving revision 1.81 diff -c -p -r1.81 bufpage.c *** src/backend/storage/page/bufpage.c 3 Nov 2008 20:47:48 -0000 1.81 --- src/backend/storage/page/bufpage.c 13 Nov 2008 17:44:23 -0000 *************** *** 41,46 **** --- 41,47 ---- MemSet(p, 0, pageSize); /* p->pd_flags = 0; done by above MemSet */ + p->pd_checksum = PAGE_INVALID_CHECKSUM; p->pd_lower = SizeOfPageHeaderData; p->pd_upper = pageSize - specialSize; p->pd_special = pageSize - specialSize; *************** *** 84,92 **** page->pd_special == MAXALIGN(page->pd_special)) return true; ! /* Check all-zeroes case */ pagebytes = (char *) page; ! for (i = 0; i < BLCKSZ; i++) { if (pagebytes[i] != 0) return false; --- 85,93 ---- page->pd_special == MAXALIGN(page->pd_special)) return true; ! /* Check all-zeroes case (skipping the checksum) */ pagebytes = (char *) page; ! for (i = sizeof(PAGE_CHECKSUM_TYPE); i < BLCKSZ; i++) { if (pagebytes[i] != 0) return false; Index: src/backend/storage/smgr/smgr.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/smgr/smgr.c,v retrieving revision 1.113 diff -c -p -r1.113 smgr.c *** src/backend/storage/smgr/smgr.c 11 Nov 2008 13:19:16 -0000 1.113 --- src/backend/storage/smgr/smgr.c 13 Nov 2008 17:44:23 -0000 *************** *** 28,33 **** --- 28,36 ---- #include "utils/memutils.h" + /* Perform block checksumming for corruption detection */ + bool enable_block_checksums = false; + /* * This struct of function pointers defines the API between smgr.c and * any individual storage manager module. Note that smgr subfunctions are *************** *** 504,509 **** --- 507,518 ---- smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool isTemp) { + /* Perform block checksumming for corruption detection */ + if (enable_block_checksums) + WritePageChecksum(buffer); + else + WriteInvalidPageChecksum(buffer); + (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum, buffer, isTemp); } *************** *** 521,526 **** --- 530,557 ---- char *buffer) { (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer); + + /* Perform block checksumming for corruption detection */ + if (enable_block_checksums && !PageIsNew(buffer) && !InRecovery && + PageGetChecksum(buffer) != PAGE_INVALID_CHECKSUM) + { + PAGE_CHECKSUM_TYPE chksum; + + CalcPageChecksum(buffer, chksum); + + if (chksum != PageGetChecksum(buffer)) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid checksum on read of block %u of relation %u/%u/%u", + blocknum, + reln->smgr_rnode.spcNode, + reln->smgr_rnode.dbNode, + reln->smgr_rnode.relNode), + errdetail("Got %08x, expected %08x.", + chksum, PageGetChecksum(buffer)))); + } + } } /* *************** *** 542,547 **** --- 573,584 ---- smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool isTemp) { + /* Perform block checksumming for corruption detection */ + if (enable_block_checksums) + WritePageChecksum(buffer); + else + WriteInvalidPageChecksum(buffer); + (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum, buffer, isTemp); } Index: src/backend/utils/misc/guc.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/misc/guc.c,v retrieving revision 1.477 diff -c -p -r1.477 guc.c *** src/backend/utils/misc/guc.c 11 Nov 2008 02:42:32 -0000 1.477 --- src/backend/utils/misc/guc.c 13 Nov 2008 17:44:23 -0000 *************** *** 57,62 **** --- 57,63 ---- #include "regex/regex.h" #include "storage/bufmgr.h" #include "storage/fd.h" + #include "storage/smgr.h" #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" *************** *** 770,775 **** --- 771,786 ---- false, NULL, NULL }, { + {"perform_checksum", PGC_SIGHUP, UNGROUPED, + gettext_noop("Forces checksumming of blocks to/from disk."), + gettext_noop("The server will perform a checksum on the block " + "when read from or written to disk in order to detect storage-related " + "corruption.") + }, + &enable_block_checksums, + false, NULL, NULL + }, + { {"log_duration", PGC_SUSET, LOGGING_WHAT, gettext_noop("Logs the duration of each completed SQL statement."), NULL Index: src/backend/utils/misc/postgresql.conf.sample =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/misc/postgresql.conf.sample,v retrieving revision 1.247 diff -c -p -r1.247 postgresql.conf.sample *** src/backend/utils/misc/postgresql.conf.sample 9 Nov 2008 00:28:35 -0000 1.247 --- src/backend/utils/misc/postgresql.conf.sample 12 Nov 2008 13:14:17 -0000 *************** *** 481,486 **** --- 481,491 ---- #transform_null_equals = off + #------------------------------------------------------------------------------ + # CORRUPTION DETECTION + #------------------------------------------------------------------------------ + + #perform_checksum = off # Perform block checksumming to/from disk #------------------------------------------------------------------------------ # CUSTOMIZED OPTIONS Index: src/backend/utils/time/tqual.c =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/time/tqual.c,v retrieving revision 1.110 diff -c -p -r1.110 tqual.c *** src/backend/utils/time/tqual.c 26 Mar 2008 16:20:47 -0000 1.110 --- src/backend/utils/time/tqual.c 13 Nov 2008 17:44:23 -0000 *************** *** 44,49 **** --- 44,50 ---- #include "access/xact.h" #include "storage/bufmgr.h" #include "storage/procarray.h" + #include "storage/smgr.h" #include "utils/tqual.h" *************** *** 96,101 **** --- 97,104 ---- } tuple->t_infomask |= infomask; + if (enable_block_checksums) + PageSetUnloggedChange(BufferGetPage(buffer)); SetBufferCommitInfoNeedsSave(buffer); } Index: src/include/pg_config_manual.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/pg_config_manual.h,v retrieving revision 1.35 diff -c -p -r1.35 pg_config_manual.h Index: src/include/access/heapam.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/access/heapam.h,v retrieving revision 1.140 diff -c -p -r1.140 heapam.h *** src/include/access/heapam.h 6 Nov 2008 20:51:15 -0000 1.140 --- src/include/access/heapam.h 12 Nov 2008 13:14:17 -0000 *************** *** 140,145 **** --- 140,147 ---- OffsetNumber *offsets, int offcnt); extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blk, Page page); + extern XLogRecPtr log_hintbits(RelFileNode *rnode, ForkNumber forkNum, + BlockNumber blk, Page page); /* in heap/pruneheap.c */ extern void heap_page_prune_opt(Relation relation, Buffer buffer, Index: src/include/access/htup.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/access/htup.h,v retrieving revision 1.103 diff -c -p -r1.103 htup.h *** src/include/access/htup.h 2 Nov 2008 01:45:28 -0000 1.103 --- src/include/access/htup.h 12 Nov 2008 13:14:17 -0000 *************** *** 580,585 **** --- 580,586 ---- #define XLOG_HEAP2_FREEZE 0x00 #define XLOG_HEAP2_CLEAN 0x10 #define XLOG_HEAP2_CLEAN_MOVE 0x20 + #define XLOG_HEAP2_HINTBITS 0x30 /* * All what we need to find changed tuple *************** *** 714,719 **** --- 715,730 ---- #define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_xid) + sizeof(TransactionId)) + /* This is what we need to know about hint bits */ + typedef struct xl_heap_hintbits + { + RelFileNode node; + BlockNumber block; + /* HINT BIT ARRAY FOLLOWS AT THE END */ + } xl_heap_hintbits; + + #define SizeOfHeapHintbits (offsetof(xl_heap_hintbits, block) + sizeof(BlockNumber)) + /* HeapTupleHeader functions implemented in utils/time/combocid.c */ extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup); extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup); Index: src/include/storage/bufpage.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/bufpage.h,v retrieving revision 1.84 diff -c -p -r1.84 bufpage.h *** src/include/storage/bufpage.h 3 Nov 2008 20:47:49 -0000 1.84 --- src/include/storage/bufpage.h 13 Nov 2008 14:42:20 -0000 *************** *** 17,22 **** --- 17,23 ---- #include "access/xlogdefs.h" #include "storage/item.h" #include "storage/off.h" + #include "utils/pg_crc.h" /* * A postgres disk page is an abstraction layered on top of a postgres *************** *** 87,92 **** --- 88,94 ---- * * space management information generic to any page * + * pd_checksum - the checksum of the page * pd_lsn - identifies xlog record for last change to this page. * pd_tli - ditto. * pd_flags - flag bits. *************** *** 118,136 **** * the constraint on pagesize mod 256 is not an important restriction. * On the high end, we can only support pages up to 32KB because lp_off/lp_len * are 15 bits. */ typedef struct PageHeaderData { ! /* XXX LSN is member of *any* block, not only page-organized ones */ XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog * record for last change to this page */ - uint16 pd_tli; /* least significant bits of the TimeLineID - * containing the LSN */ - uint16 pd_flags; /* flag bits, see below */ LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ItemIdData pd_linp[1]; /* beginning of line pointer array */ } PageHeaderData; --- 120,143 ---- * the constraint on pagesize mod 256 is not an important restriction. * On the high end, we can only support pages up to 32KB because lp_off/lp_len * are 15 bits. + * + * Note that pd_tli appears in a rather awkward position in the struct; + * this is because we moved it to accomodate pd_checksum without changing + * pg_pagesize_version's offset. */ typedef struct PageHeaderData { ! /* XXX CRC & LSN are members of *any* block, not only page-organized ones */ ! pg_crc32 pd_checksum; /* The block-level checksum */ XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog * record for last change to this page */ LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; + uint16 pd_tli; /* least significant bits of the TimeLineID + * containing the LSN */ + uint16 pd_flags; /* flag bits, see below */ TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ItemIdData pd_linp[1]; /* beginning of line pointer array */ } PageHeaderData; *************** *** 148,159 **** * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the * page for its new tuple version; this suggests that a prune is needed. * Again, this is just a hint. */ #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */ #define PD_PAGE_FULL 0x0002 /* not enough free space for new * tuple? */ ! #define PD_VALID_FLAG_BITS 0x0003 /* OR of all valid pd_flags bits */ /* * Page layout version number 0 is for pre-7.3 Postgres releases. --- 155,172 ---- * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the * page for its new tuple version; this suggests that a prune is needed. * Again, this is just a hint. + * + * PG_UNLOGGED_CHANGE indicates whether a process has set hint bits on the + * page. This is used to determine whether a WAL message needs to be emitted + * before writing the page to disk when page checksums are enabled. */ #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */ #define PD_PAGE_FULL 0x0002 /* not enough free space for new * tuple? */ + #define PD_UNLOGGED_CHANGE 0x0004 /* does the page have unlogged hint + bits? */ ! #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */ /* * Page layout version number 0 is for pre-7.3 Postgres releases. *************** *** 163,170 **** * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and * added the pd_flags field (by stealing some bits from pd_tli), * as well as adding the pd_prune_xid field (which enlarges the header). */ ! #define PG_PAGE_LAYOUT_VERSION 4 /* ---------------------------------------------------------------- --- 176,186 ---- * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and * added the pd_flags field (by stealing some bits from pd_tli), * as well as adding the pd_prune_xid field (which enlarges the header). + * Release 8.4 uses 5; it added a checksum to the page header, and moved + * pd_tli and pd_flags so that the page version would keep the same + * offset. */ ! #define PG_PAGE_LAYOUT_VERSION 5 /* ---------------------------------------------------------------- *************** *** 352,357 **** --- 368,432 ---- #define PageClearPrunable(page) \ (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) + /* ---------------------------------------------------------------- + * CRC support + * ---------------------------------------------------------------- + */ + #define PAGE_CHECKSUM_TYPE pg_crc32 + #define SIZEOF_PAGE_CHECKSUM sizeof(PAGE_CHECKSUM_TYPE) + #define PAGE_INVALID_CHECKSUM 0xb79a6e9c + + /* + * Given a page, calculate its checksum. + * + * We only include: the page header, the line pointers (except lp_flags), and + * the area between pd_upper and pd_special. The unused area is not included, + * and neither is the "special space". + */ + #define CalcPageChecksum(buffer, sum) \ + do { \ + int i; \ + INIT_CRC32(sum); \ + /* The page header, excluding pd_crc, pd_flags and pd_prune_xid */ \ + COMP_CRC32(sum, (char *) (buffer) + sizeof(pg_crc32), \ + offsetof(PageHeaderData, pd_flags)); \ + /* each line pointer, excluding lp_flags */ \ + for (i = 1; i <= PageGetMaxOffsetNumber(buffer); i++) \ + { \ + uint32 lpval; \ + lpval = *(uint32 *) PageGetItemId(buffer, i); \ + lpval &= ~ITEM_LP_FLAGS_MASK; \ + COMP_CRC32(sum, &lpval, sizeof(ItemIdData)); \ + } \ + /* the space occupied by tuples */ \ + COMP_CRC32(sum, (char *) (buffer) + ((PageHeader) (buffer))->pd_upper, \ + ((PageHeader) (buffer))->pd_special - ((PageHeader) (buffer))->pd_upper); \ + FIN_CRC32(sum); \ + } while (0) + + + /* beware multiple evaluation of argument */ + #define WritePageChecksum(buffer) \ + do { \ + PAGE_CHECKSUM_TYPE chksum; \ + CalcPageChecksum(buffer, chksum); \ + PageSetChecksum(buffer, chksum); \ + } while (0) + + #define WriteInvalidPageChecksum(buffer) \ + PageSetChecksum((buffer), PAGE_INVALID_CHECKSUM) + + #define PageGetChecksum(page) \ + (((PageHeader) (page))->pd_checksum) + #define PageSetChecksum(page, checksum) \ + (((PageHeader) (page))->pd_checksum = (checksum)) + + #define PageHasUnloggedChange(page) \ + (((PageHeader) (page))->pd_flags & PD_UNLOGGED_CHANGE) + #define PageSetUnloggedChange(page) \ + (((PageHeader) (page))->pd_flags |= PD_UNLOGGED_CHANGE) + #define PageClearUnloggedChange(page) \ + (((PageHeader) (page))->pd_flags &= ~PD_UNLOGGED_CHANGE) /* ---------------------------------------------------------------- * extern declarations Index: src/include/storage/itemid.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/itemid.h,v retrieving revision 1.30 diff -c -p -r1.30 itemid.h *** src/include/storage/itemid.h 1 Jan 2008 19:45:59 -0000 1.30 --- src/include/storage/itemid.h 13 Nov 2008 12:41:07 -0000 *************** *** 39,44 **** --- 39,47 ---- #define LP_REDIRECT 2 /* HOT redirect (should have lp_len=0) */ #define LP_DEAD 3 /* dead, may or may not have storage */ + /* the bits used by lp_flags */ + #define ITEM_LP_FLAGS_MASK 0x00018000L + /* * Item offsets and lengths are represented by these types when * they're not actually stored in an ItemIdData. Index: src/include/storage/smgr.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/smgr.h,v retrieving revision 1.63 diff -c -p -r1.63 smgr.h *** src/include/storage/smgr.h 11 Aug 2008 11:05:11 -0000 1.63 --- src/include/storage/smgr.h 12 Nov 2008 13:14:17 -0000 *************** *** 20,25 **** --- 20,28 ---- #include "storage/relfilenode.h" + /* Perform block checksumming for corruption detection */ + bool enable_block_checksums; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) Index: src/include/utils/pg_crc.h =================================================================== RCS file: /home/alvherre/Code/cvs/pgsql/src/include/utils/pg_crc.h,v retrieving revision 1.18 diff -c -p -r1.18 pg_crc.h
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers