Than you for reviewing! On Thu, Jul 7, 2016 at 7:06 AM, Andres Freund <and...@anarazel.de> wrote: > On 2016-07-05 23:37:59 +0900, Masahiko Sawada wrote: >> diff --git a/src/backend/access/heap/heapam.c >> b/src/backend/access/heap/heapam.c >> index 57da57a..fd66527 100644 >> --- a/src/backend/access/heap/heapam.c >> +++ b/src/backend/access/heap/heapam.c >> @@ -3923,6 +3923,17 @@ l2: >> >> if (need_toast || newtupsize > pagefree) >> { >> + /* >> + * To prevent data corruption due to updating old tuple by >> + * other backends after released buffer > > That's not really the reason, is it? The prime problem is crash safety / > replication. The row-lock we're faking (by setting xmax to our xid), > prevents concurrent updates until this backend died.
Fixed. >> , we need to emit that >> + * xmax of old tuple is set and clear visibility map bits if >> + * needed before releasing buffer. We can reuse xl_heap_lock >> + * for this purpose. It should be fine even if we crash midway >> + * from this section and the actual updating one later, since >> + * the xmax will appear to come from an aborted xid. >> + */ >> + START_CRIT_SECTION(); >> + >> /* Clear obsolete visibility flags ... */ >> oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); >> oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; >> @@ -3936,6 +3947,46 @@ l2: >> /* temporarily make it look not-updated */ >> oldtup.t_data->t_ctid = oldtup.t_self; >> already_marked = true; >> + >> + /* Clear PD_ALL_VISIBLE flags */ >> + if (PageIsAllVisible(BufferGetPage(buffer))) >> + { >> + all_visible_cleared = true; >> + PageClearAllVisible(BufferGetPage(buffer)); >> + visibilitymap_clear(relation, >> BufferGetBlockNumber(buffer), >> + vmbuffer); >> + } >> + >> + MarkBufferDirty(buffer); >> + >> + if (RelationNeedsWAL(relation)) >> + { >> + xl_heap_lock xlrec; >> + XLogRecPtr recptr; >> + >> + /* >> + * For logical decoding we need combocids to properly >> decode the >> + * catalog. >> + */ >> + if (RelationIsAccessibleInLogicalDecoding(relation)) >> + log_heap_new_cid(relation, &oldtup); > > Hm, I don't see that being necessary here. Row locks aren't logically > decoded, so there's no need to emit this here. Fixed. > >> + /* Clear PD_ALL_VISIBLE flags */ >> + if (PageIsAllVisible(page)) >> + { >> + Buffer vmbuffer = InvalidBuffer; >> + BlockNumber block = BufferGetBlockNumber(*buffer); >> + >> + all_visible_cleared = true; >> + PageClearAllVisible(page); >> + visibilitymap_pin(relation, block, &vmbuffer); >> + visibilitymap_clear(relation, block, vmbuffer); >> + } >> + > > That clears all-visible unnecessarily, we only need to clear all-frozen. > Fixed. > >> @@ -8694,6 +8761,23 @@ heap_xlog_lock(XLogReaderState *record) >> } >> HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); >> HeapTupleHeaderSetCmax(htup, FirstCommandId, false); >> + >> + /* The visibility map need to be cleared */ >> + if ((xlrec->infobits_set & XLHL_ALL_VISIBLE_CLEARED) != 0) >> + { >> + RelFileNode rnode; >> + Buffer vmbuffer = InvalidBuffer; >> + BlockNumber blkno; >> + Relation reln; >> + >> + XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); >> + reln = CreateFakeRelcacheEntry(rnode); >> + >> + visibilitymap_pin(reln, blkno, &vmbuffer); >> + visibilitymap_clear(reln, blkno, vmbuffer); >> + PageClearAllVisible(page); >> + } >> + > > >> PageSetLSN(page, lsn); >> MarkBufferDirty(buffer); >> } >> diff --git a/src/include/access/heapam_xlog.h >> b/src/include/access/heapam_xlog.h >> index a822d0b..41b3c54 100644 >> --- a/src/include/access/heapam_xlog.h >> +++ b/src/include/access/heapam_xlog.h >> @@ -242,6 +242,7 @@ typedef struct xl_heap_cleanup_info >> #define XLHL_XMAX_EXCL_LOCK 0x04 >> #define XLHL_XMAX_KEYSHR_LOCK 0x08 >> #define XLHL_KEYS_UPDATED 0x10 >> +#define XLHL_ALL_VISIBLE_CLEARED 0x20 > > Hm. We can't easily do that in the back-patched version; because a > standby won't know to check for the flag . That's kinda ok, since we > don't yet need to clear all-visible yet at that point of > heap_update. But that better means we don't do so on the master either. > Attached latest version patch. I changed visibilitymap_clear function so that it allows to specify bits being cleared. The function that needs to clear the only all-frozen bit on visibility map calls visibilitymap_clear_extended function to clear particular bit. Other function can call visibilitymap_clear function to clear all bits for one page. Instead of adding XLHL_ALL_VISIBLE_CLEARED, we do vm loop up for back branches. To reduce unnecessary looking up visibility map, I changed it so that we check the PD_ALL_VISIBLE on heap page, and then look up all-frozen bit on visibility map if necessary. Regards, -- Masahiko Sawada
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 57da57a..e2efba3 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3923,6 +3923,16 @@ l2: if (need_toast || newtupsize > pagefree) { + /* + * For crash safety, we need to emit that xmax of old tuple is set + * and clear only the all-frozen bit on visibility map if needed + * before releasing the buffer. We can reuse xl_heap_lock for this + * purpose. It should be fine even if we crash midway from this + * section and the actual updating one later, since the xmax will + * appear to come from an aborted xid. + */ + START_CRIT_SECTION(); + /* Clear obsolete visibility flags ... */ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; @@ -3936,6 +3946,36 @@ l2: /* temporarily make it look not-updated */ oldtup.t_data->t_ctid = oldtup.t_self; already_marked = true; + + /* Clear only the all-frozen bit on visibility map if needed */ + if (PageIsAllVisible(BufferGetPage(buffer)) && + VM_ALL_FROZEN(relation, block, &vmbuffer)) + { + visibilitymap_clear_extended(relation, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN); + } + + MarkBufferDirty(buffer); + + if (RelationNeedsWAL(relation)) + { + xl_heap_lock xlrec; + XLogRecPtr recptr; + + XLogBeginInsert(); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self); + xlrec.locking_xid = xmax_old_tuple; + xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask, + oldtup.t_data->t_infomask2); + XLogRegisterData((char *) &xlrec, SizeOfHeapLock); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* @@ -4506,6 +4546,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, ItemPointer tid = &(tuple->t_self); ItemId lp; Page page; + Buffer vmbuffer = InvalidBuffer; TransactionId xid, xmax; uint16 old_infomask, @@ -5034,6 +5075,17 @@ failed: if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) tuple->t_data->t_ctid = *tid; + /* Clear only the all-frozen bit on visibility map if needed */ + if (PageIsAllVisible(page) && + VM_ALL_FROZEN(relation, BufferGetBlockNumber(*buffer), &vmbuffer)) + { + BlockNumber block = BufferGetBlockNumber(*buffer); + + visibilitymap_pin(relation, block, &vmbuffer); + visibilitymap_clear_extended(relation, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN); + } + MarkBufferDirty(*buffer); /* @@ -5072,6 +5124,8 @@ failed: END_CRIT_SECTION(); LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); + if (BufferIsValid(vmbuffer)) + ReleaseBuffer(vmbuffer); /* * Don't update the visibility map here. Locking a tuple doesn't change @@ -8694,6 +8748,22 @@ heap_xlog_lock(XLogReaderState *record) } HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + + /* The all-frozen bit on visibility map need to be cleared if needed */ + if (PageIsAllVisible(BufferGetPage(buffer))) + { + RelFileNode rnode; + Buffer vmbuffer = InvalidBuffer; + BlockNumber blkno; + Relation reln; + + XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); + reln = CreateFakeRelcacheEntry(rnode); + visibilitymap_clear_extended(reln, blkno, vmbuffer, + VISIBILITYMAP_ALL_FROZEN); + ReleaseBuffer(vmbuffer); + } + PageSetLSN(page, lsn); MarkBufferDirty(buffer); } diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index b472d31..c52c0b0 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -11,7 +11,7 @@ * src/backend/access/heap/visibilitymap.c * * INTERFACE ROUTINES - * visibilitymap_clear - clear a bit in the visibility map + * visibilitymap_clear - clear all bits for one page in the visibility map * visibilitymap_pin - pin a map page for setting a bit * visibilitymap_pin_ok - check whether correct map page is already pinned * visibilitymap_set - set a bit in a previously pinned page @@ -157,23 +157,34 @@ static const uint8 number_of_ones_for_frozen[256] = { static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); static void vm_extend(Relation rel, BlockNumber nvmblocks); +/* + * A shorthand for visibilitymap_clear_extended, for clearing all bits for one + * page in visibility map with VISIBILITYMAP_VALID_BITS. + */ +void +visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf) +{ + visibilitymap_clear_extended(rel, heapBlk, buf, VISIBILITYMAP_VALID_BITS); +} /* - * visibilitymap_clear - clear all bits for one page in visibility map + * visibilitymap_clear_extended - clear bit(s) for one page in visibility map * * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do * any I/O. */ void -visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf) +visibilitymap_clear_extended(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); int mapOffset = HEAPBLK_TO_OFFSET(heapBlk); - uint8 mask = VISIBILITYMAP_VALID_BITS << mapOffset; + uint8 mask = flags << mapOffset; char *map; + Assert(flags & VISIBILITYMAP_VALID_BITS); + #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk); #endif diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index fca99ca..f305b03 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -36,6 +36,8 @@ extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf); +extern void visibilitymap_clear_extended(Relation rel, BlockNumber heapBlk, + Buffer vmbuf, uint8 flags); extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers