With the consent of Anastasia I will improving this patch further.
Attachment contains next version of the patch set.
11.07.2018 00:03, Heikki Linnakangas пишет:
On 28/02/18 18:03, Anastasia Lubennikova wrote:
Implementation is based on generic_xlog.
Why? I think we should just add a log_relation() function in
xloginsert.c directly, alongside log_newpage_buffer().
I have some arguments to stay this functionality at generic_xlog module:
1. xloginsert.c functions work on low level of abstraction, use buffers
and pages.
2. Code size using generic_xlog service functions looks more compact and
safe.
This makes the assumption that all the pages in these indexes used the
standard page layout. I think that's a valid assumption, but needs at
least a comment on that. And perhaps an Assert, to check that
pd_lower/upper look sane.
Done
As a further optimization, would it be a win to WAL-log multiple pages
in each record?
In this version of the patch we use simple optimization: pack
XLR_NORMAL_MAX_BLOCK_ID blocks pieces into each WAL-record.
This leaves the XLOG_*_CREATE_INDEX WAL record types unused, BTW.
Done
- Heikki
Benchmarks:
-----------
Test: pgbench -f gin-WAL-test.sql -t 5:
---------------------------------------
master:
Latency average: 27696.299 ms
WAL size: 2.66 GB
patched
Latency average: 22812.103 ms
WAL size: 1.23 GB
Test: pgbench -f gist-WAL-test.sql -t 5:
----------------------------------------
master:
Latency average: 19928.284 ms
WAL size: 1.25 GB
patched
Latency average: 18175.064 ms
WAL size: 0.63 GB
Test: pgbench -f spgist-WAL-test.sql -t 5:
------------------------------------------
master:
Latency average: 11529.384 ms
WAL size: 1.07 GB
patched
Latency average: 9330.828 ms
WAL size: 0.6 GB
--
Andrey Lepikhov
Postgres Professional
https://postgrespro.com
The Russian Postgres Company
>From db400ce9532536da36812dbf0456e756a0ea4724 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:22:17 +0500
Subject: [PATCH 1/4] Relation-into-WAL-function
---
src/backend/access/transam/generic_xlog.c | 62 +++++++++++++++++++++++
src/include/access/generic_xlog.h | 3 ++
2 files changed, 65 insertions(+)
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
index ce023548ae..8397b58ee7 100644
--- a/src/backend/access/transam/generic_xlog.c
+++ b/src/backend/access/transam/generic_xlog.c
@@ -80,6 +80,7 @@ static void computeRegionDelta(PageData *pageData,
static void computeDelta(PageData *pageData, Page curpage, Page targetpage);
static void applyPageRedo(Page page, const char *delta, Size deltaSize);
+static void standard_page_layout_check(Buffer buf);
/*
* Write next fragment into pageData's delta.
@@ -545,3 +546,64 @@ generic_mask(char *page, BlockNumber blkno)
mask_unused_space(page);
}
+
+/*
+ * Check page layout.
+ * Caller must lock the buffer
+ */
+static void
+standard_page_layout_check(Buffer buf)
+{
+ PageHeader ph = (PageHeader) BufferGetPage(buf);
+
+ Assert((ph->pd_lower >= SizeOfPageHeaderData) &&
+ (ph->pd_lower <= ph->pd_upper) &&
+ (ph->pd_upper <= ph->pd_special) &&
+ (ph->pd_special <= BLCKSZ) &&
+ (ph->pd_special == MAXALIGN(ph->pd_special)));
+}
+
+/*
+ * Function to write generic xlog for every existing block of a relation.
+ * Caller is responsible for locking the relation exclusively.
+ */
+void
+generic_log_relation(Relation rel)
+{
+ BlockNumber blkno;
+ BlockNumber nblocks;
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ elog(DEBUG2, "generic_log_relation '%s', nblocks %u BEGIN.",
+ RelationGetRelationName(rel), nblocks);
+
+ for (blkno = 0; blkno < nblocks; )
+ {
+ GenericXLogState *state;
+ Buffer buffer[MAX_GENERIC_XLOG_PAGES];
+ int counter,
+ blocks_pack;
+
+ CHECK_FOR_INTERRUPTS();
+
+ blocks_pack = ((nblocks-blkno) < MAX_GENERIC_XLOG_PAGES) ?
+ (nblocks-blkno) : MAX_GENERIC_XLOG_PAGES;
+
+ state = GenericXLogStart(rel);
+
+ for (counter = 0 ; counter < blocks_pack; counter++)
+ {
+ buffer[counter] = ReadBuffer(rel, blkno++);
+ standard_page_layout_check(buffer[counter]);
+ LockBuffer(buffer[counter], BUFFER_LOCK_EXCLUSIVE);
+ GenericXLogRegisterBuffer(state, buffer[counter], GENERIC_XLOG_FULL_IMAGE);
+ }
+
+ GenericXLogFinish(state);
+
+ for (counter = 0 ; counter < blocks_pack; counter++)
+ UnlockReleaseBuffer(buffer[counter]);
+ }
+ elog(DEBUG2, "generic_log_relation '%s' END.", RelationGetRelationName(rel));
+}
diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h
index b23e1f684b..1f4b3b7030 100644
--- a/src/include/access/generic_xlog.h
+++ b/src/include/access/generic_xlog.h
@@ -42,4 +42,7 @@ extern const char *generic_identify(uint8 info);
extern void generic_desc(StringInfo buf, XLogReaderState *record);
extern void generic_mask(char *pagedata, BlockNumber blkno);
+/* other utils */
+extern void generic_log_relation(Relation rel);
+
#endif /* GENERIC_XLOG_H */
--
2.17.1
>From 8db76f41386e9b205fd16f856a823c9a3fabe6fc Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:22:46 +0500
Subject: [PATCH 2/4] GIN-Optimal-WAL-Usage
---
src/backend/access/gin/ginbtree.c | 6 ++---
src/backend/access/gin/gindatapage.c | 10 ++++----
src/backend/access/gin/ginentrypage.c | 2 +-
src/backend/access/gin/gininsert.c | 30 ++++++++++--------------
src/backend/access/gin/ginutil.c | 4 ++--
src/backend/access/gin/ginvacuum.c | 2 +-
src/backend/access/gin/ginxlog.c | 33 ---------------------------
src/backend/access/rmgrdesc/gindesc.c | 6 -----
src/include/access/gin.h | 3 ++-
src/include/access/ginxlog.h | 2 --
10 files changed, 27 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 030d0f4418..b6d9f1bace 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -392,7 +392,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/* It will fit, perform the insertion */
START_CRIT_SECTION();
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -413,7 +413,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
MarkBufferDirty(childbuf);
}
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
ginxlogInsert xlrec;
@@ -591,7 +591,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/* write WAL record */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 9f20513811..7dd4284e26 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* Great, all the items fit on a single page. If needed, prepare data
* for a WAL record describing the changes we'll make.
*/
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
computeLeafRecompressWALData(leaf);
/*
@@ -630,6 +630,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* subsequent insertions will probably also go to the end. This packs
* the index somewhat tighter when appending to a table, which is very
* common.
+ *
*/
if (!btree->isBuild)
{
@@ -719,7 +720,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
dataPlaceToPageLeafRecompress(buf, leaf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
@@ -1152,7 +1153,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1774,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
Pointer ptr;
int nrootitems;
int rootsize;
+ bool is_build = (buildStats != NULL);
/* Construct the new root page in memory first. */
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1828,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageRestoreTempPage(tmppage, page);
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogCreatePostingTree data;
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 810769718f..b0fdb23e2b 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- if (RelationNeedsWAL(btree->index))
+ if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
* This must be static, because it has to survive until XLogInsert,
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 5281eb6823..980ab2232c 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -194,6 +195,7 @@ ginEntryInsert(GinState *ginstate,
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+ btree.isBuild = (buildStats != NULL);
stack = ginFindLeafPage(&btree, false, NULL);
page = BufferGetPage(stack->buffer);
@@ -346,23 +348,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
- Page page;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
- page = BufferGetPage(RootBuffer);
- PageSetLSN(page, recptr);
-
- page = BufferGetPage(MetaBuffer);
- PageSetLSN(page, recptr);
- }
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
@@ -417,7 +402,16 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
- ginUpdateStats(index, &buildstate.buildStats);
+ ginUpdateStats(index, &buildstate.buildStats, true);
+
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
/*
* Return statistics
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 0a32182dd7..37d3e89acb 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -666,7 +666,7 @@ ginGetStats(Relation index, GinStatsData *stats)
* Note: nPendingPages and ginVersion are *not* copied over
*/
void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
{
Buffer metabuffer;
Page metapage;
@@ -696,7 +696,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !is_build)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 3104bc12b6..1d8626fd5e 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -765,7 +765,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
- ginUpdateStats(info->index, &idxStat);
+ ginUpdateStats(info->index, &idxStat, false);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index 7a1e94a1d5..13a78a2a3b 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer RootBuffer,
- MetaBuffer;
- Page page;
-
- MetaBuffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(MetaBuffer);
-
- GinInitMetabuffer(MetaBuffer);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(MetaBuffer);
-
- RootBuffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
- page = (Page) BufferGetPage(RootBuffer);
-
- GinInitBuffer(RootBuffer, GIN_LEAF);
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(RootBuffer);
-
- UnlockReleaseBuffer(RootBuffer);
- UnlockReleaseBuffer(MetaBuffer);
-}
-
static void
ginRedoCreatePTree(XLogReaderState *record)
{
@@ -724,9 +694,6 @@ gin_redo(XLogReaderState *record)
oldCtx = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- ginRedoCreateIndex(record);
- break;
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(record);
break;
diff --git a/src/backend/access/rmgrdesc/gindesc.c b/src/backend/access/rmgrdesc/gindesc.c
index 3456187e3d..1bb1733666 100644
--- a/src/backend/access/rmgrdesc/gindesc.c
+++ b/src/backend/access/rmgrdesc/gindesc.c
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_GIN_CREATE_INDEX:
- /* no further information */
- break;
case XLOG_GIN_CREATE_PTREE:
/* no further information */
break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_GIN_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_GIN_CREATE_PTREE:
id = "CREATE_PTREE";
break;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 3d8a130b69..0fa33f5a19 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -71,6 +71,7 @@ extern int gin_pending_list_limit;
/* ginutil.c */
extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index,
+ const GinStatsData *stats, bool is_build);
#endif /* GIN_H */
diff --git a/src/include/access/ginxlog.h b/src/include/access/ginxlog.h
index 64a3c9e18b..5c74973444 100644
--- a/src/include/access/ginxlog.h
+++ b/src/include/access/ginxlog.h
@@ -16,8 +16,6 @@
#include "lib/stringinfo.h"
#include "storage/off.h"
-#define XLOG_GIN_CREATE_INDEX 0x00
-
#define XLOG_GIN_CREATE_PTREE 0x10
typedef struct ginxlogCreatePostingTree
--
2.17.1
>From 49a5fc72492f4917990bfeb7bd6c4fd06e52a7d5 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:23:11 +0500
Subject: [PATCH 3/4] GIST-Optimal-WAL-Usage
---
src/backend/access/gist/gist.c | 46 ++++++++++++++++++--------
src/backend/access/gist/gistbuild.c | 32 ++++++++++--------
src/backend/access/gist/gistutil.c | 2 +-
src/backend/access/gist/gistxlog.c | 22 ------------
src/backend/access/rmgrdesc/gistdesc.c | 5 ---
src/include/access/gist_private.h | 10 +++---
src/include/access/gistxlog.h | 1 -
7 files changed, 57 insertions(+), 61 deletions(-)
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 8a42effdf7..227998b1f8 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -172,7 +172,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
- gistdoinsert(r, itup, 0, giststate);
+ gistdoinsert(r, itup, 0, giststate, false);
/* cleanup */
MemoryContextSwitchTo(oldCxt);
@@ -218,7 +218,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
BlockNumber *newblkno,
Buffer leftchildbuf,
List **splitinfo,
- bool markfollowright)
+ bool markfollowright,
+ bool is_build)
{
BlockNumber blkno = BufferGetBlockNumber(buffer);
Page page = BufferGetPage(buffer);
@@ -457,7 +458,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* insertion for that. NB: The number of pages and data segments
* specified here must match the calculations in gistXLogSplit()!
*/
- if (RelationNeedsWAL(rel))
+ if (RelationNeedsWAL(rel) && !is_build)
XLogEnsureRecordSpace(npage, 1 + npage * 2);
START_CRIT_SECTION();
@@ -478,18 +479,20 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
- /* Write the WAL record */
- if (RelationNeedsWAL(rel))
+ /*
+ * Write the WAL record.
+ * Do not write XLog entry if the insertion is caused by
+ * index build process.
+ */
+ if (RelationNeedsWAL(rel) && !is_build)
recptr = gistXLogSplit(is_leaf,
- dist, oldrlink, oldnsn, leftchildbuf,
- markfollowright);
+ dist, oldrlink, oldnsn, leftchildbuf,
+ markfollowright);
else
recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
- {
PageSetLSN(ptr->page, recptr);
- }
/*
* Return the new child buffers to the caller.
@@ -543,7 +546,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (BufferIsValid(leftchildbuf))
MarkBufferDirty(leftchildbuf);
- if (RelationNeedsWAL(rel))
+
+ if (RelationNeedsWAL(rel) && !is_build)
{
OffsetNumber ndeloffs = 0,
deloffs[1];
@@ -566,6 +570,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
PageSetLSN(page, recptr);
}
+
if (newblkno)
*newblkno = blkno;
}
@@ -582,17 +587,28 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* the full page image. There's a chicken-and-egg problem: if we updated
* the child pages first, we wouldn't know the recptr of the WAL record
* we're about to write.
+ *
+ * We use fakeLSNs for inserions caused by index build. And when it is
+ * finished, we write generic_xlog entry for each index page and update
+ * all LSNs. In order to keep NSNs less then LSNs after this update, we
+ * set NSN to InvalidXLogRecPtr, which is the smallest possible NSN.
*/
+
if (BufferIsValid(leftchildbuf))
{
Page leftpg = BufferGetPage(leftchildbuf);
+ XLogRecPtr fakerecptr = InvalidXLogRecPtr;
- GistPageSetNSN(leftpg, recptr);
- GistClearFollowRight(leftpg);
+ if (!is_build)
+ GistPageSetNSN(leftpg, recptr);
+ else
+ GistPageSetNSN(leftpg, fakerecptr);
+ GistClearFollowRight(leftpg);
PageSetLSN(leftpg, recptr);
}
+
END_CRIT_SECTION();
return is_split;
@@ -604,7 +620,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
* so it does not bother releasing palloc'd allocations.
*/
void
-gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
+gistdoinsert(Relation r, IndexTuple itup, Size freespace,
+ GISTSTATE *giststate, bool is_build)
{
ItemId iid;
IndexTuple idxtuple;
@@ -616,6 +633,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
memset(&state, 0, sizeof(GISTInsertState));
state.freespace = freespace;
state.r = r;
+ state.is_build = is_build;
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1232,7 +1250,7 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
oldoffnum, NULL,
leftchild,
&splitinfo,
- true);
+ true, state->is_build);
/*
* Before recursing up in case the page was split, release locks on the
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 434f15f014..b61dbf8ac3 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -20,6 +20,7 @@
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
@@ -178,18 +179,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
MarkBufferDirty(buffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
- PageSetLSN(page, recptr);
- }
- else
- PageSetLSN(page, gistGetFakeLSN(heap));
+ /*
+ * Do not write index pages to WAL unitl index build is finished.
+ * But we still need increasing LSNs on each page, so use FakeLSN,
+ * even for relations which eventually need WAL.
+ */
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
@@ -222,6 +217,15 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
freeGISTstate(buildstate.giststate);
+ /*
+ * Create generic wal records for all pages of relation, if necessary.
+ * It seems reasonable not to generate WAL, if we recieved interrupt
+ * signal.
+ */
+ CHECK_FOR_INTERRUPTS();
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
/*
* Return statistics
*/
@@ -484,7 +488,7 @@ gistBuildCallback(Relation index,
* locked, we call gistdoinsert directly.
*/
gistdoinsert(index, itup, buildstate->freespace,
- buildstate->giststate);
+ buildstate->giststate, true);
}
/* Update tuple count and total size. */
@@ -690,7 +694,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
itup, ntup, oldoffnum, &placed_to_blk,
InvalidBuffer,
&splitinfo,
- false);
+ false, true);
/*
* If this is a root split, update the root path item kept in memory. This
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 12804c321c..0b3f8db679 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -974,6 +974,7 @@ gistproperty(Oid index_oid, int attno,
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
* to detect concurrent page splits anyway. This function provides a fake
* sequence of LSNs for that purpose.
+ * Persistent relations are also not WAL-logged while we build index.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
@@ -994,7 +995,6 @@ gistGetFakeLSN(Relation rel)
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
- Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 1e09126978..9b0abccdaf 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -281,25 +281,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
UnlockReleaseBuffer(firstbuffer);
}
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
- page = (Page) BufferGetPage(buffer);
-
- GISTInitBuffer(buffer, F_LEAF);
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
void
gist_redo(XLogReaderState *record)
{
@@ -321,9 +302,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
- case XLOG_GIST_CREATE_INDEX:
- gistRedoCreateIndex(record);
- break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index e5e925e0c5..1685a2fb8a 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -44,8 +44,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_SPLIT:
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
break;
- case XLOG_GIST_CREATE_INDEX:
- break;
}
}
@@ -62,9 +60,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_SPLIT:
id = "PAGE_SPLIT";
break;
- case XLOG_GIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
}
return id;
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 36ed7244ba..0588fc716a 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -241,6 +241,7 @@ typedef struct
{
Relation r;
Size freespace; /* free space to be left */
+ bool is_build;
GISTInsertStack *stack;
} GISTInsertState;
@@ -387,9 +388,9 @@ extern MemoryContext createTempGistContext(void);
extern GISTSTATE *initGISTstate(Relation index);
extern void freeGISTstate(GISTSTATE *giststate);
extern void gistdoinsert(Relation r,
- IndexTuple itup,
- Size freespace,
- GISTSTATE *GISTstate);
+ IndexTuple itup,
+ Size freespace,
+ GISTSTATE* giststate, bool is_build);
/* A List of these is returned from gistplacetopage() in *splitinfo */
typedef struct
@@ -404,7 +405,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
OffsetNumber oldoffnum, BlockNumber *newblkno,
Buffer leftchildbuf,
List **splitinfo,
- bool markleftchild);
+ bool markleftchild,
+ bool is_build);
extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
int len, GISTSTATE *giststate);
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 1a2b9496d0..5932fc395b 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -21,7 +21,6 @@
/* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
#define XLOG_GIST_PAGE_SPLIT 0x30
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
/*
--
2.17.1
>From 41ec17abe5a60b5dedee28aa940c8556bc0e402f Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Tue, 31 Jul 2018 07:23:36 +0500
Subject: [PATCH 4/4] SPGIST-Optimal-WAL-Usage
---
src/backend/access/rmgrdesc/spgdesc.c | 5 ----
src/backend/access/spgist/spgdoinsert.c | 12 ++++-----
src/backend/access/spgist/spginsert.c | 24 +++--------------
src/backend/access/spgist/spgxlog.c | 35 -------------------------
src/include/access/spgxlog.h | 1 -
5 files changed, 10 insertions(+), 67 deletions(-)
diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c
index 92b1392974..14beb5702f 100644
--- a/src/backend/access/rmgrdesc/spgdesc.c
+++ b/src/backend/access/rmgrdesc/spgdesc.c
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- break;
case XLOG_SPGIST_ADD_LEAF:
{
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
switch (info & ~XLR_INFO_MASK)
{
- case XLOG_SPGIST_CREATE_INDEX:
- id = "CREATE_INDEX";
- break;
case XLOG_SPGIST_ADD_LEAF:
id = "ADD_LEAF";
break;
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 098e09c574..e84ffdfb16 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
MarkBufferDirty(nbuf);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
saveCurrent.buffer = InvalidBuffer;
}
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(saveCurrent.buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
MarkBufferDirty(current->buffer);
- if (RelationNeedsWAL(index))
+ if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 7dd0d61fbb..18ee9f8cbe 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -21,6 +21,7 @@
#include "access/spgxlog.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/generic_xlog.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -104,26 +105,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
MarkBufferDirty(nullbuffer);
- if (RelationNeedsWAL(index))
- {
- XLogRecPtr recptr;
-
- XLogBeginInsert();
-
- /*
- * Replay will re-initialize the pages, so don't take full pages
- * images. No other data to log.
- */
- XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
- XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
- PageSetLSN(BufferGetPage(metabuffer), recptr);
- PageSetLSN(BufferGetPage(rootbuffer), recptr);
- PageSetLSN(BufferGetPage(nullbuffer), recptr);
- }
END_CRIT_SECTION();
@@ -150,6 +131,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
SpGistUpdateMetaPage(index);
+ if (RelationNeedsWAL(index))
+ generic_log_relation(index);
+
result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 9e2bd3f811..2c42f1be42 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
size);
}
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
- XLogRecPtr lsn = record->EndRecPtr;
- Buffer buffer;
- Page page;
-
- buffer = XLogInitBufferForRedo(record, 0);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
- page = (Page) BufferGetPage(buffer);
- SpGistInitMetapage(page);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 1);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
- buffer = XLogInitBufferForRedo(record, 2);
- Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
- SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
- page = (Page) BufferGetPage(buffer);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-}
-
static void
spgRedoAddLeaf(XLogReaderState *record)
{
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
- case XLOG_SPGIST_CREATE_INDEX:
- spgRedoCreateIndex(record);
- break;
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h
index b72ccb5cc4..44ad891de3 100644
--- a/src/include/access/spgxlog.h
+++ b/src/include/access/spgxlog.h
@@ -18,7 +18,6 @@
#include "storage/off.h"
/* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
#define XLOG_SPGIST_MOVE_LEAFS 0x20
#define XLOG_SPGIST_ADD_NODE 0x30
--
2.17.1