On 26/03/2019 15:59, Heikki Linnakangas wrote:
On 26/03/2019 11:29, Andrey Lepikhov wrote:On 25/03/2019 15:21, Heikki Linnakangas wrote:Hmm. When do we create all-zero pages during index build? That seems pretty surprising.GIST uses buffered pages. During GIST build it is possible (very rarely) what no one index tuple was written to the block page before new block was allocated. And the page has become an all-zero page. You can't have problems in the current GIST code, because it writes into the WAL only changed pages.Looking at the code, I don't see how that could happen. The only place where the GiST index file is extended is in gistNewBuffer(), and all callers of that initialize the page immediately after the call. What am I missing?
Sorry, This issue was found in SP-GiST AM. You can show it: 1. Apply v2 version of the patch set (see attachment). 2. In the generic_log_relation() routine set logging on PageIsNew(buf) 3. Run script t1.sql (in attachment).This problem can be resolved by calling MarkBufferDirty() after SpGistInitBuffer() in the allocNewBuffer() routine. But in this case we will write to the WAL more pages than necessary. To avoid it in the patch '0001-Relation-into-WAL-function' I do not write new pages to the WAL.
Attached patch set is not final version. It is needed for demonstration of 'all-zero pages' issue only. The sentence for the direct use of XLOG_FPI records will be considered in v3.
-- Andrey Lepikhov Postgres Professional https://postgrespro.com The Russian Postgres Company
t1.sql
Description: application/sql
>From d3093aa9a7628979b892d31449eda6228ef169ce Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru> Date: Mon, 1 Apr 2019 08:33:46 +0500 Subject: [PATCH 1/4] Relation-into-WAL-function --- src/backend/access/transam/generic_xlog.c | 48 +++++++++++++++++++++++ src/include/access/generic_xlog.h | 3 ++ 2 files changed, 51 insertions(+) diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c index 5b00b7275b..c22e361747 100644 --- a/src/backend/access/transam/generic_xlog.c +++ b/src/backend/access/transam/generic_xlog.c @@ -542,3 +542,51 @@ generic_mask(char *page, BlockNumber blkno) mask_unused_space(page); } + +/* + * Function to write generic xlog for every existing block of a relation. + * Caller is responsible for locking the relation exclusively. + */ +void +generic_log_relation(Relation rel) +{ + BlockNumber blkno; + BlockNumber nblocks; + int npbuf = 0; + GenericXLogState *state = NULL; + Buffer bufpack[MAX_GENERIC_XLOG_PAGES]; + + CHECK_FOR_INTERRUPTS(); + nblocks = RelationGetNumberOfBlocks(rel); + + /* + * Iterate over all index pages and WAL-logging it. Pages are grouping into + * the packages before adding to a WAL-record. Zero-pages are + * not logged. + */ + for (blkno = 0; blkno < nblocks; blkno++) + { + Buffer buf; + + buf = ReadBuffer(rel, blkno); + if (!PageIsNew(BufferGetPage(buf))) + { + if (npbuf == 0) + state = GenericXLogStart(rel); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); + bufpack[npbuf++] = buf; + } + else + ReleaseBuffer(buf); + + if ((npbuf == MAX_GENERIC_XLOG_PAGES) || (blkno == nblocks-1)) + { + GenericXLogFinish(state); + + for (; npbuf > 0; npbuf--) + UnlockReleaseBuffer(bufpack[npbuf-1]); + } + } +} diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h index cb5b5b713a..e3bbf014cc 100644 --- a/src/include/access/generic_xlog.h +++ b/src/include/access/generic_xlog.h @@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info); extern void generic_desc(StringInfo buf, XLogReaderState *record); extern void generic_mask(char *pagedata, BlockNumber blkno); +/* other utils */ +extern void generic_log_relation(Relation rel); + #endif /* GENERIC_XLOG_H */ -- 2.17.1
>From 9a0172346c8a942b6a493aca8c47452256a2932f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru> Date: Mon, 1 Apr 2019 08:37:32 +0500 Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage --- src/backend/access/gin/ginbtree.c | 6 ++--- src/backend/access/gin/gindatapage.c | 9 ++++---- src/backend/access/gin/ginentrypage.c | 2 +- src/backend/access/gin/gininsert.c | 30 ++++++++++-------------- src/backend/access/gin/ginutil.c | 4 ++-- src/backend/access/gin/ginvacuum.c | 2 +- src/backend/access/gin/ginxlog.c | 33 --------------------------- src/backend/access/rmgrdesc/gindesc.c | 6 ----- src/include/access/gin.h | 3 ++- src/include/access/ginxlog.h | 2 -- 10 files changed, 26 insertions(+), 71 deletions(-) diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 533949e46a..9f82eef8c3 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, /* It will fit, perform the insertion */ START_CRIT_SECTION(); - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { XLogBeginInsert(); XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD); @@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, MarkBufferDirty(childbuf); } - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { XLogRecPtr recptr; ginxlogInsert xlrec; @@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } /* write WAL record */ - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { XLogRecPtr recptr; diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 3ad8b76710..f3aff62c8e 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, * Great, all the items fit on a single page. If needed, prepare data * for a WAL record describing the changes we'll make. */ - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) computeLeafRecompressWALData(leaf); /* @@ -719,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, dataPlaceToPageLeafRecompress(buf, leaf); /* If needed, register WAL data built by computeLeafRecompressWALData */ - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen); } @@ -1152,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack, pitem = (PostingItem *) insertdata; GinDataPageAddPostingItem(page, pitem, off); - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { /* * This must be static, because it has to survive until XLogInsert, @@ -1773,6 +1773,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, Pointer ptr; int nrootitems; int rootsize; + bool is_build = (buildStats != NULL); /* Construct the new root page in memory first. */ tmppage = (Page) palloc(BLCKSZ); @@ -1826,7 +1827,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, PageRestoreTempPage(tmppage, page); MarkBufferDirty(buffer); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !is_build) { XLogRecPtr recptr; ginxlogCreatePostingTree data; diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 4889de2a4f..1f5ba33d51 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); - if (RelationNeedsWAL(btree->index)) + if (RelationNeedsWAL(btree->index) && !btree->isBuild) { /* * This must be static, because it has to survive until XLogInsert, diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index b02f69b0dc..a15ca7f942 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -17,6 +17,7 @@ #include "access/gin_private.h" #include "access/ginxlog.h" #include "access/xloginsert.h" +#include "access/generic_xlog.h" #include "access/tableam.h" #include "catalog/index.h" #include "miscadmin.h" @@ -195,6 +196,7 @@ ginEntryInsert(GinState *ginstate, buildStats->nEntries++; ginPrepareEntryScan(&btree, attnum, key, category, ginstate); + btree.isBuild = (buildStats != NULL); stack = ginFindLeafPage(&btree, false, false, NULL); page = BufferGetPage(stack->buffer); @@ -347,23 +349,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) GinInitBuffer(RootBuffer, GIN_LEAF); MarkBufferDirty(RootBuffer); - if (RelationNeedsWAL(index)) - { - XLogRecPtr recptr; - Page page; - - XLogBeginInsert(); - XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); - XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT); - - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX); - - page = BufferGetPage(RootBuffer); - PageSetLSN(page, recptr); - - page = BufferGetPage(MetaBuffer); - PageSetLSN(page, recptr); - } UnlockReleaseBuffer(MetaBuffer); UnlockReleaseBuffer(RootBuffer); @@ -419,7 +404,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) * Update metapage stats */ buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index); - ginUpdateStats(index, &buildstate.buildStats); + ginUpdateStats(index, &buildstate.buildStats, true); + + /* + * Create generic wal records for all pages of relation, if necessary. + * It seems reasonable not to generate WAL, if we recieved interrupt + * signal. + */ + CHECK_FOR_INTERRUPTS(); + if (RelationNeedsWAL(index)) + generic_log_relation(index); /* * Return statistics diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index afc20232ac..51b20bca6e 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -661,7 +661,7 @@ ginGetStats(Relation index, GinStatsData *stats) * Note: nPendingPages and ginVersion are *not* copied over */ void -ginUpdateStats(Relation index, const GinStatsData *stats) +ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build) { Buffer metabuffer; Page metapage; @@ -691,7 +691,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats) MarkBufferDirty(metabuffer); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !is_build) { XLogRecPtr recptr; ginxlogUpdateMeta data; diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index dfe885b101..b9a28d1863 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* Update the metapage with accurate page and entry counts */ idxStat.nTotalPages = npages; - ginUpdateStats(info->index, &idxStat); + ginUpdateStats(info->index, &idxStat, false); /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index c467ffa346..b648af1ff6 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id) UnlockReleaseBuffer(buffer); } -static void -ginRedoCreateIndex(XLogReaderState *record) -{ - XLogRecPtr lsn = record->EndRecPtr; - Buffer RootBuffer, - MetaBuffer; - Page page; - - MetaBuffer = XLogInitBufferForRedo(record, 0); - Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO); - page = (Page) BufferGetPage(MetaBuffer); - - GinInitMetabuffer(MetaBuffer); - - PageSetLSN(page, lsn); - MarkBufferDirty(MetaBuffer); - - RootBuffer = XLogInitBufferForRedo(record, 1); - Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO); - page = (Page) BufferGetPage(RootBuffer); - - GinInitBuffer(RootBuffer, GIN_LEAF); - - PageSetLSN(page, lsn); - MarkBufferDirty(RootBuffer); - - UnlockReleaseBuffer(RootBuffer); - UnlockReleaseBuffer(MetaBuffer); -} - static void ginRedoCreatePTree(XLogReaderState *record) { @@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record) oldCtx = MemoryContextSwitchTo(opCtx); switch (info) { - case XLOG_GIN_CREATE_INDEX: - ginRedoCreateIndex(record); - break; case XLOG_GIN_CREATE_PTREE: ginRedoCreatePTree(record); break; diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c index ef30ce16b0..f3f4e1b214 100644 --- a/src/backend/access/rmgrdesc/gindesc.c +++ b/src/backend/access/rmgrdesc/gindesc.c @@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record) switch (info) { - case XLOG_GIN_CREATE_INDEX: - /* no further information */ - break; case XLOG_GIN_CREATE_PTREE: /* no further information */ break; @@ -188,9 +185,6 @@ gin_identify(uint8 info) switch (info & ~XLR_INFO_MASK) { - case XLOG_GIN_CREATE_INDEX: - id = "CREATE_INDEX"; - break; case XLOG_GIN_CREATE_PTREE: id = "CREATE_PTREE"; break; diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 61fa697039..d559ffc703 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -71,6 +71,7 @@ extern int gin_pending_list_limit; /* ginutil.c */ extern void ginGetStats(Relation index, GinStatsData *stats); -extern void ginUpdateStats(Relation index, const GinStatsData *stats); +extern void ginUpdateStats(Relation index, + const GinStatsData *stats, bool is_build); #endif /* GIN_H */ diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h index 9bd4e0b9ba..2c5d743cac 100644 --- a/src/include/access/ginxlog.h +++ b/src/include/access/ginxlog.h @@ -16,8 +16,6 @@ #include "lib/stringinfo.h" #include "storage/off.h" -#define XLOG_GIN_CREATE_INDEX 0x00 - #define XLOG_GIN_CREATE_PTREE 0x10 typedef struct ginxlogCreatePostingTree -- 2.17.1
>From 3e73e862c1aa20bf4eeca20ba4381a1d3c6f19d9 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru> Date: Mon, 1 Apr 2019 09:07:32 +0500 Subject: [PATCH 3/4] GiST-Optimal-WAL-Usage --- src/backend/access/gist/gist.c | 46 ++++++++++++++++++-------- src/backend/access/gist/gistbuild.c | 32 ++++++++++-------- src/backend/access/gist/gistutil.c | 2 +- src/backend/access/gist/gistxlog.c | 22 ------------ src/backend/access/rmgrdesc/gistdesc.c | 5 --- src/include/access/gist_private.h | 7 ++-- src/include/access/gistxlog.h | 1 - 7 files changed, 56 insertions(+), 59 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 2fddb23496..0e2b6c3014 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull, values, isnull, true /* size is currently bogus */ ); itup->t_tid = *ht_ctid; - gistdoinsert(r, itup, 0, giststate, heapRel); + gistdoinsert(r, itup, 0, giststate, heapRel, false); /* cleanup */ MemoryContextSwitchTo(oldCxt); @@ -219,7 +219,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer leftchildbuf, List **splitinfo, bool markfollowright, - Relation heapRel) + Relation heapRel, + bool is_build) { BlockNumber blkno = BufferGetBlockNumber(buffer); Page page = BufferGetPage(buffer); @@ -458,7 +459,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, * insertion for that. NB: The number of pages and data segments * specified here must match the calculations in gistXLogSplit()! */ - if (RelationNeedsWAL(rel)) + if (RelationNeedsWAL(rel) && !is_build) XLogEnsureRecordSpace(npage, 1 + npage * 2); START_CRIT_SECTION(); @@ -479,18 +480,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer)); dist->page = BufferGetPage(dist->buffer); - /* Write the WAL record */ - if (RelationNeedsWAL(rel)) + /* + * Write the WAL record. + * Do not write XLog entry if the insertion is caused by + * index build process. + */ + if (RelationNeedsWAL(rel) && !is_build) recptr = gistXLogSplit(is_leaf, - dist, oldrlink, oldnsn, leftchildbuf, - markfollowright); + dist, oldrlink, oldnsn, leftchildbuf, + markfollowright); else recptr = gistGetFakeLSN(rel); for (ptr = dist; ptr; ptr = ptr->next) - { PageSetLSN(ptr->page, recptr); - } /* * Return the new child buffers to the caller. @@ -544,7 +547,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, if (BufferIsValid(leftchildbuf)) MarkBufferDirty(leftchildbuf); - if (RelationNeedsWAL(rel)) + + if (RelationNeedsWAL(rel) && !is_build) { OffsetNumber ndeloffs = 0, deloffs[1]; @@ -567,6 +571,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, PageSetLSN(page, recptr); } + if (newblkno) *newblkno = blkno; } @@ -583,17 +588,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, * the full page image. There's a chicken-and-egg problem: if we updated * the child pages first, we wouldn't know the recptr of the WAL record * we're about to write. + * + * We use fakeLSNs for inserions caused by index build. And when it is + * finished, we write generic_xlog entry for each index page and update + * all LSNs. In order to keep NSNs less then LSNs after this update, we + * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN. */ + if (BufferIsValid(leftchildbuf)) { Page leftpg = BufferGetPage(leftchildbuf); + XLogRecPtr fakerecptr = InvalidXLogRecPtr; - GistPageSetNSN(leftpg, recptr); - GistClearFollowRight(leftpg); + if (!is_build) + GistPageSetNSN(leftpg, recptr); + else + GistPageSetNSN(leftpg, fakerecptr); + GistClearFollowRight(leftpg); PageSetLSN(leftpg, recptr); } + END_CRIT_SECTION(); return is_split; @@ -606,7 +622,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, */ void gistdoinsert(Relation r, IndexTuple itup, Size freespace, - GISTSTATE *giststate, Relation heapRel) + GISTSTATE *giststate, Relation heapRel, bool is_build) { ItemId iid; IndexTuple idxtuple; @@ -619,6 +635,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, state.freespace = freespace; state.r = r; state.heapRel = heapRel; + state.is_build = is_build; /* Start from the root */ firststack.blkno = GIST_ROOT_BLKNO; @@ -1251,7 +1268,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, leftchild, &splitinfo, true, - state->heapRel); + state->heapRel, + state->is_build); /* * Before recursing up in case the page was split, release locks on the diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 3652fde5bb..8d0d285cab 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -17,6 +17,7 @@ #include <math.h> #include "access/genam.h" +#include "access/generic_xlog.h" #include "access/gist_private.h" #include "access/gistxlog.h" #include "access/tableam.h" @@ -181,18 +182,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) MarkBufferDirty(buffer); - if (RelationNeedsWAL(index)) - { - XLogRecPtr recptr; - - XLogBeginInsert(); - XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); - - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX); - PageSetLSN(page, recptr); - } - else - PageSetLSN(page, gistGetFakeLSN(heap)); + /* + * Do not write index pages to WAL unitl index build is finished. + * But we still need increasing LSNs on each page, so use FakeLSN, + * even for relations which eventually need WAL. + */ + PageSetLSN(page, gistGetFakeLSN(heap)); UnlockReleaseBuffer(buffer); @@ -226,6 +221,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) freeGISTstate(buildstate.giststate); + /* + * Create generic wal records for all pages of relation, if necessary. + * It seems reasonable not to generate WAL, if we recieved interrupt + * signal. + */ + CHECK_FOR_INTERRUPTS(); + if (RelationNeedsWAL(index)) + generic_log_relation(index); + /* * Return statistics */ @@ -488,7 +492,7 @@ gistBuildCallback(Relation index, * locked, we call gistdoinsert directly. */ gistdoinsert(index, itup, buildstate->freespace, - buildstate->giststate, buildstate->heaprel); + buildstate->giststate, buildstate->heaprel, true); } /* Update tuple count and total size. */ @@ -695,7 +699,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, InvalidBuffer, &splitinfo, false, - buildstate->heaprel); + buildstate->heaprel, true); /* * If this is a root split, update the root path item kept in memory. This diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 2163cc482d..af278e5ded 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1004,6 +1004,7 @@ gistproperty(Oid index_oid, int attno, * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs * to detect concurrent page splits anyway. This function provides a fake * sequence of LSNs for that purpose. + * Persistent relations are also not WAL-logged while we build index. */ XLogRecPtr gistGetFakeLSN(Relation rel) @@ -1024,7 +1025,6 @@ gistGetFakeLSN(Relation rel) * Unlogged relations are accessible from other backends, and survive * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us. */ - Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); return GetFakeLSNForUnloggedRel(); } } diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index cb80ab00cd..4fb1855e89 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -490,25 +490,6 @@ gistRedoPageSplitRecord(XLogReaderState *record) UnlockReleaseBuffer(firstbuffer); } -static void -gistRedoCreateIndex(XLogReaderState *record) -{ - XLogRecPtr lsn = record->EndRecPtr; - Buffer buffer; - Page page; - - buffer = XLogInitBufferForRedo(record, 0); - Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); - page = (Page) BufferGetPage(buffer); - - GISTInitBuffer(buffer, F_LEAF); - - PageSetLSN(page, lsn); - - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); -} - /* redo page deletion */ static void gistRedoPageDelete(XLogReaderState *record) @@ -594,9 +575,6 @@ gist_redo(XLogReaderState *record) case XLOG_GIST_PAGE_SPLIT: gistRedoPageSplitRecord(record); break; - case XLOG_GIST_CREATE_INDEX: - gistRedoCreateIndex(record); - break; case XLOG_GIST_PAGE_DELETE: gistRedoPageDelete(record); break; diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 3ff4f83d38..eb308c72d6 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -71,8 +71,6 @@ gist_desc(StringInfo buf, XLogReaderState *record) case XLOG_GIST_PAGE_SPLIT: out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec); break; - case XLOG_GIST_CREATE_INDEX: - break; case XLOG_GIST_PAGE_DELETE: out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec); break; @@ -98,9 +96,6 @@ gist_identify(uint8 info) case XLOG_GIST_PAGE_SPLIT: id = "PAGE_SPLIT"; break; - case XLOG_GIST_CREATE_INDEX: - id = "CREATE_INDEX"; - break; case XLOG_GIST_PAGE_DELETE: id = "PAGE_DELETE"; break; diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 02dc285a78..78e2e3fb31 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -244,6 +244,7 @@ typedef struct Relation r; Relation heapRel; Size freespace; /* free space to be left */ + bool is_build; GISTInsertStack *stack; } GISTInsertState; @@ -393,7 +394,8 @@ extern void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *GISTstate, - Relation heapRel); + Relation heapRel, + bool is_build); /* A List of these is returned from gistplacetopage() in *splitinfo */ typedef struct @@ -409,7 +411,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer leftchildbuf, List **splitinfo, bool markleftchild, - Relation heapRel); + Relation heapRel, + bool is_build); extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate); diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 2f87b67a53..80931497ca 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -23,7 +23,6 @@ * FSM */ #define XLOG_GIST_PAGE_SPLIT 0x30 /* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */ -#define XLOG_GIST_CREATE_INDEX 0x50 #define XLOG_GIST_PAGE_DELETE 0x60 /* -- 2.17.1
>From 19968269bc08f71ed1eaf4f33d1e06da6fda9708 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru> Date: Mon, 1 Apr 2019 09:07:51 +0500 Subject: [PATCH 4/4] SP-GiST-Optimal-WAL-Usage --- src/backend/access/rmgrdesc/spgdesc.c | 5 ---- src/backend/access/spgist/spgdoinsert.c | 12 ++++----- src/backend/access/spgist/spginsert.c | 24 +++-------------- src/backend/access/spgist/spgxlog.c | 35 ------------------------- src/include/access/spgxlog.h | 1 - 5 files changed, 10 insertions(+), 67 deletions(-) diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c index 37af31a764..40c1c8b3f9 100644 --- a/src/backend/access/rmgrdesc/spgdesc.c +++ b/src/backend/access/rmgrdesc/spgdesc.c @@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record) switch (info) { - case XLOG_SPGIST_CREATE_INDEX: - break; case XLOG_SPGIST_ADD_LEAF: { spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec; @@ -88,9 +86,6 @@ spg_identify(uint8 info) switch (info & ~XLR_INFO_MASK) { - case XLOG_SPGIST_CREATE_INDEX: - id = "CREATE_INDEX"; - break; case XLOG_SPGIST_ADD_LEAF: id = "ADD_LEAF"; break; diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index 0d07b8b291..c34c44cd8b 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple, MarkBufferDirty(current->buffer); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !state->isBuild) { XLogRecPtr recptr; int flags; @@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state, MarkBufferDirty(current->buffer); MarkBufferDirty(nbuf); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !state->isBuild) { XLogRecPtr recptr; @@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state, saveCurrent.buffer = InvalidBuffer; } - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !state->isBuild) { XLogRecPtr recptr; int flags; @@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state, MarkBufferDirty(current->buffer); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !state->isBuild) { XLogRecPtr recptr; @@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state, MarkBufferDirty(saveCurrent.buffer); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !state->isBuild) { XLogRecPtr recptr; int flags; @@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state, MarkBufferDirty(current->buffer); - if (RelationNeedsWAL(index)) + if (RelationNeedsWAL(index) && !state->isBuild) { XLogRecPtr recptr; diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 390ad9ac51..6ec48b9f93 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -22,6 +22,7 @@ #include "access/tableam.h" #include "access/xlog.h" #include "access/xloginsert.h" +#include "access/generic_xlog.h" #include "catalog/index.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -105,26 +106,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS); MarkBufferDirty(nullbuffer); - if (RelationNeedsWAL(index)) - { - XLogRecPtr recptr; - - XLogBeginInsert(); - - /* - * Replay will re-initialize the pages, so don't take full pages - * images. No other data to log. - */ - XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); - XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); - XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); - - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX); - - PageSetLSN(BufferGetPage(metabuffer), recptr); - PageSetLSN(BufferGetPage(rootbuffer), recptr); - PageSetLSN(BufferGetPage(nullbuffer), recptr); - } END_CRIT_SECTION(); @@ -151,6 +132,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) SpGistUpdateMetaPage(index); + if (RelationNeedsWAL(index)) + generic_log_relation(index); + result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c index 71836ee8a5..ebe6ae8715 100644 --- a/src/backend/access/spgist/spgxlog.c +++ b/src/backend/access/spgist/spgxlog.c @@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset) size); } -static void -spgRedoCreateIndex(XLogReaderState *record) -{ - XLogRecPtr lsn = record->EndRecPtr; - Buffer buffer; - Page page; - - buffer = XLogInitBufferForRedo(record, 0); - Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO); - page = (Page) BufferGetPage(buffer); - SpGistInitMetapage(page); - PageSetLSN(page, lsn); - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); - - buffer = XLogInitBufferForRedo(record, 1); - Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO); - SpGistInitBuffer(buffer, SPGIST_LEAF); - page = (Page) BufferGetPage(buffer); - PageSetLSN(page, lsn); - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); - - buffer = XLogInitBufferForRedo(record, 2); - Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO); - SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS); - page = (Page) BufferGetPage(buffer); - PageSetLSN(page, lsn); - MarkBufferDirty(buffer); - UnlockReleaseBuffer(buffer); -} - static void spgRedoAddLeaf(XLogReaderState *record) { @@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record) oldCxt = MemoryContextSwitchTo(opCtx); switch (info) { - case XLOG_SPGIST_CREATE_INDEX: - spgRedoCreateIndex(record); - break; case XLOG_SPGIST_ADD_LEAF: spgRedoAddLeaf(record); break; diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h index 6527fc9eb1..8199b3f250 100644 --- a/src/include/access/spgxlog.h +++ b/src/include/access/spgxlog.h @@ -18,7 +18,6 @@ #include "storage/off.h" /* XLOG record types for SPGiST */ -#define XLOG_SPGIST_CREATE_INDEX 0x00 #define XLOG_SPGIST_ADD_LEAF 0x10 #define XLOG_SPGIST_MOVE_LEAFS 0x20 #define XLOG_SPGIST_ADD_NODE 0x30 -- 2.17.1