Hi, Attached is patch which adds ability to do fast-forwarding while decoding. That means wal is consumed as fast as possible and changes are not given to output plugin for sending. The implementation is less invasive than I originally though it would be. Most of it is just additional filter condition in places where we would normally filter out changes because we don't yet have full snapshot.
This is useful for multiple things. It enables us to do the replication slot advance for both physical and logical slots, something that Magnus took stab at some time ago, but does not seem like it went anywhere (this is useful for replication tooling). This patch adds SQL visible pg_replication_slot_advance() function for that use case. It also makes second phase (after we reached SNAPBUILD_FULL_SNAPSHOT) of replication slot creation faster, especially when there are big transactions as the reorder buffer does not have to deal with data changes and does not have to spill to disk. And finally it will be useful for developing failover support of slots. -- Petr Jelinek http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
From f02cca94098cb7a29a94d409659a74349a8c9c2f Mon Sep 17 00:00:00 2001 From: Petr Jelinek <pjmo...@pjmodos.net> Date: Fri, 29 Dec 2017 09:46:35 +0100 Subject: [PATCH] Add pg_replication_slot_advance() function Advances replication slot to specified position. Works both with logical and physical slots. This also adds fast forward mode for logical decoding which is used by the pg_replication_slot_advance() function as well as slot creation. --- doc/src/sgml/func.sgml | 19 +++ src/backend/replication/logical/decode.c | 40 +++-- src/backend/replication/logical/logical.c | 16 +- src/backend/replication/logical/logicalfuncs.c | 1 + src/backend/replication/slotfuncs.c | 199 +++++++++++++++++++++++++ src/backend/replication/walsender.c | 1 + src/include/catalog/pg_proc.h | 2 + src/include/replication/logical.h | 8 + 8 files changed, 269 insertions(+), 17 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 4dd9d029e6..67bad68bfc 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19147,6 +19147,25 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); </entry> </row> + <row> + <entry> + <indexterm> + <primary>pg_replication_slot_advance</primary> + </indexterm> + <literal><function>pg_replication_slot_advance(<parameter>slot_name</parameter> <type>name</type>, <parameter>upto_lsn</parameter> <type>pg_lsn</type>)</function></literal> + </entry> + <entry> + (<parameter>slot_name</parameter> <type>name</type>, <parameter>end_lsn</parameter> <type>pg_lsn</type>) + <type>bool</type> + </entry> + <entry> + Advances the current confirmed position of a replication slot named + <parameter>slot_name</parameter>. The slot will not be moved backwards, + and it will not be moved beyond the current insert location. Returns + name of the slot and real position to which it was advanced to. + </entry> + </row> + <row> <entry id="pg-replication-origin-create"> <indexterm> diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 486fd0c988..f502e34175 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -332,8 +332,10 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_invalidations *invalidations = (xl_invalidations *) XLogRecGetData(r); - ReorderBufferImmediateInvalidation( - ctx->reorder, invalidations->nmsgs, invalidations->msgs); + if (!ctx->fast_forward) + ReorderBufferImmediateInvalidation(ctx->reorder, + invalidations->nmsgs, + invalidations->msgs); } break; default: @@ -353,14 +355,19 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr); - /* no point in doing anything yet */ - if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) + /* + * If we don't have snapshot or we are just fast-forwarding, there is no + * point in decoding changes. + */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT || + ctx->fast_forward) return; switch (info) { case XLOG_HEAP2_MULTI_INSERT: - if (SnapBuildProcessChange(builder, xid, buf->origptr)) + if (!ctx->fast_forward && + SnapBuildProcessChange(builder, xid, buf->origptr)) DecodeMultiInsert(ctx, buf); break; case XLOG_HEAP2_NEW_CID: @@ -408,8 +415,12 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr); - /* no point in doing anything yet */ - if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) + /* + * If we don't have snapshot or we are just fast-forwarding, there is no + * point in decoding data changes. + */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT || + ctx->fast_forward) return; switch (info) @@ -501,8 +512,12 @@ DecodeLogicalMsgOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr); - /* No point in doing anything yet. */ - if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) + /* + * If we don't have snapshot or we are just fast-forwarding, there is no + * point in decoding messages. + */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT || + ctx->fast_forward) return; message = (xl_logical_message *) XLogRecGetData(r); @@ -554,8 +569,9 @@ DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, */ if (parsed->nmsgs > 0) { - ReorderBufferAddInvalidations(ctx->reorder, xid, buf->origptr, - parsed->nmsgs, parsed->msgs); + if (!ctx->fast_forward) + ReorderBufferAddInvalidations(ctx->reorder, xid, buf->origptr, + parsed->nmsgs, parsed->msgs); ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); } @@ -589,7 +605,7 @@ DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, */ if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) || (parsed->dbId != InvalidOid && parsed->dbId != ctx->slot->data.database) || - FilterByOrigin(ctx, origin_id)) + ctx->fast_forward || FilterByOrigin(ctx, origin_id)) { for (i = 0; i < parsed->nsubxacts; i++) { diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index bca585fc27..57a168cb5f 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -115,6 +115,7 @@ StartupDecodingContext(List *output_plugin_options, XLogRecPtr start_lsn, TransactionId xmin_horizon, bool need_full_snapshot, + bool fast_forward, XLogPageReadCB read_page, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, @@ -140,7 +141,8 @@ StartupDecodingContext(List *output_plugin_options, * (re-)load output plugins, so we detect a bad (removed) output plugin * now. */ - LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin)); + if (!fast_forward) + LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin)); /* * Now that the slot's xmin has been set, we can announce ourselves as a @@ -191,6 +193,8 @@ StartupDecodingContext(List *output_plugin_options, ctx->output_plugin_options = output_plugin_options; + ctx->fast_forward = fast_forward; + MemoryContextSwitchTo(old_context); return ctx; @@ -303,8 +307,9 @@ CreateInitDecodingContext(char *plugin, ReplicationSlotSave(); ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon, - need_full_snapshot, read_page, prepare_write, - do_write, update_progress); + need_full_snapshot, true, + read_page, prepare_write, do_write, + update_progress); /* call output plugin initialization callback */ old_context = MemoryContextSwitchTo(ctx->context); @@ -342,6 +347,7 @@ CreateInitDecodingContext(char *plugin, LogicalDecodingContext * CreateDecodingContext(XLogRecPtr start_lsn, List *output_plugin_options, + bool fast_forward, XLogPageReadCB read_page, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, @@ -395,8 +401,8 @@ CreateDecodingContext(XLogRecPtr start_lsn, ctx = StartupDecodingContext(output_plugin_options, start_lsn, InvalidTransactionId, false, - read_page, prepare_write, do_write, - update_progress); + fast_forward, read_page, prepare_write, + do_write, update_progress); /* call output plugin initialization callback */ old_context = MemoryContextSwitchTo(ctx->context); diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c index a3ba2b1266..413c74a36b 100644 --- a/src/backend/replication/logical/logicalfuncs.c +++ b/src/backend/replication/logical/logicalfuncs.c @@ -251,6 +251,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin /* restart at slot's confirmed_flush */ ctx = CreateDecodingContext(InvalidXLogRecPtr, options, + false, logical_read_local_xlog_page, LogicalOutputPrepareWrite, LogicalOutputWrite, NULL); diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index ab776e85d2..fac40ee4d6 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -17,11 +17,14 @@ #include "miscadmin.h" #include "access/htup_details.h" +#include "replication/decode.h" #include "replication/slot.h" #include "replication/logical.h" #include "replication/logicalfuncs.h" #include "utils/builtins.h" +#include "utils/inval.h" #include "utils/pg_lsn.h" +#include "utils/resowner.h" static void check_permissions(void) @@ -312,3 +315,199 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) return (Datum) 0; } + +/* + * Helper function for advancing physical replication slot forward. + */ +static XLogRecPtr +pg_physical_replication_slot_advance(XLogRecPtr startlsn, XLogRecPtr moveto) +{ + XLogRecPtr retlsn = InvalidXLogRecPtr; + + SpinLockAcquire(&MyReplicationSlot->mutex); + if (MyReplicationSlot->data.restart_lsn < moveto) + { + MyReplicationSlot->data.restart_lsn = moveto; + retlsn = moveto; + } + SpinLockRelease(&MyReplicationSlot->mutex); + + return retlsn; +} + +/* + * Helper function for advancing logical replication slot forward. + */ +static XLogRecPtr +pg_logical_replication_slot_advance(XLogRecPtr startlsn, XLogRecPtr moveto) +{ + LogicalDecodingContext *ctx; + ResourceOwner old_resowner = CurrentResourceOwner; + XLogRecPtr retlsn = InvalidXLogRecPtr; + + PG_TRY(); + { + /* restart at slot's confirmed_flush */ + ctx = CreateDecodingContext(InvalidXLogRecPtr, + NIL, + true, + logical_read_local_xlog_page, + NULL, NULL, NULL); + + CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, + "logical decoding"); + + /* invalidate non-timetravel entries */ + InvalidateSystemCaches(); + + /* Decode until we run out of records */ + while ((startlsn != InvalidXLogRecPtr && startlsn < moveto) || + (ctx->reader->EndRecPtr != InvalidXLogRecPtr && ctx->reader->EndRecPtr < moveto)) + { + XLogRecord *record; + char *errm = NULL; + + record = XLogReadRecord(ctx->reader, startlsn, &errm); + if (errm) + elog(ERROR, "%s", errm); + + /* + * Now that we've set up the xlog reader state, subsequent calls + * pass InvalidXLogRecPtr to say "continue from last record" + */ + startlsn = InvalidXLogRecPtr; + + /* + * The {begin_txn,change,commit_txn}_wrapper callbacks above will + * store the description into our tuplestore. + */ + if (record != NULL) + LogicalDecodingProcessRecord(ctx, ctx->reader); + + /* check limits */ + if (moveto <= ctx->reader->EndRecPtr) + break; + + CHECK_FOR_INTERRUPTS(); + } + + CurrentResourceOwner = old_resowner; + + if (ctx->reader->EndRecPtr != InvalidXLogRecPtr) + { + LogicalConfirmReceivedLocation(moveto); + + /* + * If only the confirmed_flush_lsn has changed the slot won't get + * marked as dirty by the above. Callers on the walsender + * interface are expected to keep track of their own progress and + * don't need it written out. But SQL-interface users cannot + * specify their own start positions and it's harder for them to + * keep track of their progress, so we should make more of an + * effort to save it for them. + * + * Dirty the slot so it's written out at the next checkpoint. + * We'll still lose its position on crash, as documented, but it's + * better than always losing the position even on clean restart. + */ + ReplicationSlotMarkDirty(); + } + + retlsn = MyReplicationSlot->data.confirmed_flush; + + /* free context, call shutdown callback */ + FreeDecodingContext(ctx); + + InvalidateSystemCaches(); + } + PG_CATCH(); + { + /* clear all timetravel entries */ + InvalidateSystemCaches(); + + PG_RE_THROW(); + } + PG_END_TRY(); + + return retlsn; +} + +/* + * SQL function for moving the position in a replication slot. + */ +Datum +pg_replication_slot_advance(PG_FUNCTION_ARGS) +{ + Name slotname = PG_GETARG_NAME(0); + XLogRecPtr moveto = PG_GETARG_LSN(1); + XLogRecPtr endlsn; + XLogRecPtr startlsn; + TupleDesc tupdesc; + Datum values[2]; + bool nulls[2]; + HeapTuple tuple; + Datum result; + + Assert(!MyReplicationSlot); + + check_permissions(); + + if (XLogRecPtrIsInvalid(moveto)) + ereport(ERROR, + (errmsg("invalid target wal lsn"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + /* + * Compute the current end-of-wal and maintain ThisTimeLineID. + * RecoveryInProgress() will update ThisTimeLineID on promotion. + */ + if (!RecoveryInProgress()) + moveto = GetFlushRecPtr(); + else + moveto = GetXLogReplayRecPtr(&ThisTimeLineID); + + /* Acquire the slot so we "own" it */ + ReplicationSlotAcquire(NameStr(*slotname), true); + + startlsn = MyReplicationSlot->data.confirmed_flush; + if (moveto < startlsn) + { + ReplicationSlotRelease(); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move slot to %X/%X, minimum is %X/%X", + (uint32) (moveto >> 32), (uint32) moveto, + (uint32) (MyReplicationSlot->data.confirmed_flush >> 32), + (uint32) (MyReplicationSlot->data.confirmed_flush)))); + } + + if (OidIsValid(MyReplicationSlot->data.database)) + endlsn = pg_logical_replication_slot_advance(startlsn, moveto); + else + endlsn = pg_physical_replication_slot_advance(startlsn, moveto); + + values[0] = NameGetDatum(&MyReplicationSlot->data.name); + nulls[0] = false; + + /* Update the on disk state when lsn was updated. */ + if (XLogRecPtrIsInvalid(endlsn)) + { + ReplicationSlotMarkDirty(); + ReplicationSlotsComputeRequiredXmin(false); + ReplicationSlotsComputeRequiredLSN(); + ReplicationSlotSave(); + } + + ReplicationSlotRelease(); + + /* Return the reached position. */ + values[1] = LSNGetDatum(endlsn); + nulls[1] = false; + + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +} diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index f2e886f99f..f1ab74103b 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1074,6 +1074,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) * to be shipped from that position. */ logical_decoding_ctx = CreateDecodingContext(cmd->startpoint, cmd->options, + false, logical_read_xlog_page, WalSndPrepareWrite, WalSndWriteData, diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 830bab37ea..87c7e18b8a 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5357,6 +5357,8 @@ DATA(insert OID = 3784 ( pg_logical_slot_peek_changes PGNSP PGUID 12 1000 1000 DESCR("peek at changes from replication slot"); DATA(insert OID = 3785 ( pg_logical_slot_peek_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v u 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slot_name,upto_lsn,upto_nchanges,options,lsn,xid,data}" _null_ _null_ pg_logical_slot_peek_binary_changes _null_ _null_ _null_ )); DESCR("peek at binary changes from replication slot"); +DATA(insert OID = 3878 ( pg_replication_slot_advance PGNSP PGUID 12 1 0 0 0 f f f f t f v u 2 0 2249 "19 3220" "{19,3220,19,3220}" "{i,i,o,o}" "{slot_name,upto_lsn,slot_name,end_lsn}" _null_ _null_ pg_replication_slot_advance _null_ _null_ _null_ )); +DESCR("advance logical replication slot"); DATA(insert OID = 3577 ( pg_logical_emit_message PGNSP PGUID 12 1 0 0 0 f f f f t f v u 3 0 3220 "16 25 25" _null_ _null_ _null_ _null_ _null_ pg_logical_emit_message_text _null_ _null_ _null_ )); DESCR("emit a textual logical decoding message"); DATA(insert OID = 3578 ( pg_logical_emit_message PGNSP PGUID 12 1 0 0 0 f f f f t f v u 3 0 3220 "16 25 17" _null_ _null_ _null_ _null_ _null_ pg_logical_emit_message_bytea _null_ _null_ _null_ )); diff --git a/src/include/replication/logical.h b/src/include/replication/logical.h index 7f0e0fa881..a52bfe4da4 100644 --- a/src/include/replication/logical.h +++ b/src/include/replication/logical.h @@ -45,6 +45,13 @@ typedef struct LogicalDecodingContext struct ReorderBuffer *reorder; struct SnapBuild *snapshot_builder; + /* + * Marks the logical decoding context as fast forward decoding one. + * Such a context does not have plugin loaded so most of the the following + * properties are unused. + */ + bool fast_forward; + OutputPluginCallbacks callbacks; OutputPluginOptions options; @@ -97,6 +104,7 @@ extern LogicalDecodingContext *CreateInitDecodingContext(char *plugin, extern LogicalDecodingContext *CreateDecodingContext( XLogRecPtr start_lsn, List *output_plugin_options, + bool fast_forward, XLogPageReadCB read_page, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, -- 2.14.1