On Wed, Feb 3, 2021 at 08:07:16PM -0500, Bruce Momjian wrote: > > > I can try to add a hint-bit-page-write page counter, but that might > > > overflow, and then we will need a way to change the LSN anyway. > > > > That's just a question of width... > > Yeah, the hint bit counter is just delaying the inevitable, plus it > changes the page format, which I am trying to avoid. Also, I need this > dummy record only if the page is marked clean, meaning a write > to the file system already happened in the current checkpoint --- should > not be to bad.
Here is a proof-of-concept patch to do this. Thanks for your help. -- Bruce Momjian <br...@momjian.us> https://momjian.us EDB https://enterprisedb.com The usefulness of a cup is in its emptiness, Bruce Lee
>From 11cb0ca7405a74105602fd63766e88c1fea8ea30 Mon Sep 17 00:00:00 2001 From: Bruce Momjian <br...@momjian.us> Date: Wed, 3 Feb 2021 22:25:39 -0500 Subject: [PATCH] hint squash commit --- src/backend/access/rmgrdesc/xlogdesc.c | 6 +++++- src/backend/access/transam/xlog.c | 4 ++++ src/backend/access/transam/xloginsert.c | 26 ++++++++++++++++++++++++ src/backend/replication/logical/decode.c | 1 + src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++ src/include/access/xloginsert.h | 2 ++ src/include/catalog/pg_control.h | 1 + 7 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 92cc7ea073..16a967bb71 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -80,7 +80,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record) appendStringInfoString(buf, xlrec->rp_name); } - else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT) + else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT || + info == XLOG_HINT_LSN) { /* no further information to print */ } @@ -185,6 +186,9 @@ xlog_identify(uint8 info) case XLOG_FPI_FOR_HINT: id = "FPI_FOR_HINT"; break; + case XLOG_HINT_LSN: + id = "HINT_LSN"; + break; } return id; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f03bd473e2..f67316ee07 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10260,6 +10260,10 @@ xlog_redo(XLogReaderState *record) UnlockReleaseBuffer(buffer); } } + else if (info == XLOG_HINT_LSN) + { + /* nothing to do here */ + } else if (info == XLOG_BACKUP_END) { XLogRecPtr startpoint; diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 7052dc245e..eebca0fc16 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -980,6 +980,32 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) return recptr; } +/* + * On the first hint bit change during a checkpoint, XLogSaveBufferForHint() + * writes a full page image to the WAL and returns a new LSN which can be + * assigned to the page. Cluster file encryption needs a new LSN for + * every hint bit page write because the LSN is used in the nonce, and + * the nonce must be unique. Therefore, this routine just advances the LSN + * so the page can be assigned a new LSN. + * + * Callable while holding just share lock on the buffer content. + * + * The LSN of the inserted wal record is returned if we had to write, + * InvalidXLogRecPtr otherwise. + * + * It is possible that multiple concurrent backends could attempt to write WAL + * records. In that case, multiple copies of the same block would be recorded + * in separate WAL records by different backends, though that is still OK from + * a correctness perspective. + */ +XLogRecPtr +XLogLSNForHint(void) +{ + XLogBeginInsert(); + + return XLogInsert(RM_XLOG_ID, XLOG_HINT_LSN); +} + /* * Write a WAL record containing a full image of a page. Caller is responsible * for writing the page to disk after calling this routine. diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index afa1df00d0..2ada89c6b1 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -223,6 +223,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) case XLOG_FPW_CHANGE: case XLOG_FPI_FOR_HINT: case XLOG_FPI: + case XLOG_HINT_LSN: break; default: elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 561c212092..fd59f9fc1d 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3865,6 +3865,24 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) { dirtied = true; /* Means "will be dirtied by this action" */ + /* + * If the buffer is clean, and lsn is valid, it must be the + * first hint bit change of the checkpoint (the old page lsn + * is earlier than the RedoRecPtr). If the page is clean and + * the lsn is invalid, the page must have been already written + * during this checkpoint, and this is a new hint bit change. + * Such page changes usually don't create new page lsns, but we + * need one for cluster file encryption because the lsn is used as + * part of the nonce, and we don't want to reencrypt the page + * with the hint bit changes using the same nonce as the previous + * writes during this checkpoint. (It would expose the hint bit + * change locations.) Therefore we create a simple WAL record + * to advance the lsn, which can can then be assigned to the + * page. + */ + if (XLogRecPtrIsInvalid(lsn) /* XXX && file_encryption_method != 0 */) + lsn = XLogLSNForHint(); + /* * Set the page LSN if we wrote a backup block. We aren't supposed * to set this when only holding a share lock but as long as we diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index f1d8c39edf..a066f65229 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -61,6 +61,8 @@ extern void log_newpage_range(Relation rel, ForkNumber forkNum, BlockNumber startblk, BlockNumber endblk, bool page_std); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); +extern XLogRecPtr XLogLSNForHint(void); + extern void InitXLogInsert(void); #endif /* XLOGINSERT_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index e3f48158ce..36ac7dfbb8 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -76,6 +76,7 @@ typedef struct CheckPoint #define XLOG_END_OF_RECOVERY 0x90 #define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI 0xB0 +#define XLOG_HINT_LSN 0xC0 /* -- 2.20.1