Michael Paquier wrote: > > diff --git a/src/backend/access/transam/xlog.c > > b/src/backend/access/transam/xlog.c > > index 7375a78ffc..3a1f49e83a 100644 > > --- a/src/backend/access/transam/xlog.c > > +++ b/src/backend/access/transam/xlog.c > > @@ -81,8 +81,6 @@ extern uint32 bootstrap_data_checksum_version; > > /* File path names (all relative to $PGDATA) */ > > #define RECOVERY_COMMAND_FILE "recovery.conf" > > #define RECOVERY_COMMAND_DONE "recovery.done" > > -#define PROMOTE_SIGNAL_FILE "promote" > > -#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote" > > Perhaps we could just move the whole set to xlog.h.
Done. > > +Datum > > +pg_promote(PG_FUNCTION_ARGS) > > +{ > > + FILE *promote_file; > > + > > + if (!superuser()) > > + ereport(ERROR, > > + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), > > + errmsg("must be superuser to promote standby > > servers"))); > > Let's remove this restriction at code level, and instead use > function-level ACLs so as it is possible to allow non-superusers to > trigger a promotion as well, say a dedicated role. We could add a > system role for this purpose, but I am not sure that it is worth the > addition yet. Agreed, done. > > + while ($self->safe_psql('postgres', "SELECT pg_is_in_recovery()") != > > 'f') > > + { > > + sleep 1; > > + } > > + return; > > This could go on infinitely, locking down buildfarm machines silently if > a commit is not careful. What I would suggest to do instead is to not > touch PostgresNode.pm at all, and to just modify one test to trigger > promotion with the SQL function. Then from the test, you should add a > comment that triggerring promotion from SQL is wanted instead of the > internal routine, and then please add a call to poll_query_until() in > the same way as what 6deb52b2 has removed. I have modified recovery/t/004_timeline_switch.pl and recovery/t/009_twophase.pl accordingly: one calls the function with "wait => true", the other uses "wait => false" and waits as you suggested. > As of now, this function returns true if promotion has been triggered, > and false if not. However it seems to me that we should have something > more consistent with "pg_ctl promote", so there are actually three > possible states: > 1) Node is in recovery, with promotion triggered. pg_promote() returns > true in case of success in creating the trigger file. > 2) Node is in recovery, with promotion triggered. pg_promote() returns > false in case of failure in creating the trigger file. > 3) Node is not in recovery, where I think a call to pg_promote should > trigger an error. This is not handled in the patch. So far I had returned "false" in the last case, but I am fine with throwing an error in that case. Done. > An other thing which has value is to implement a "wait" mode for this > feature, or actually a nowait mode. You could simply do what pg_ctl > does with its logic in get_control_dbstate() by looking at the control > file state. I think that waiting for the promotion to happen should be > the default. I have implemented that. > At the end this patch needs a bit more work. Yes, it did. Thanks for the thorough review! Yours, Laurenz Albe
From 8e45dc7dfbb76eb1625323e3cd8f161779973528 Mon Sep 17 00:00:00 2001 From: Laurenz Albe <laurenz.a...@cybertec.at> Date: Mon, 15 Oct 2018 16:07:56 +0200 Subject: [PATCH] Add pg_promote() to promote standby servers --- doc/src/sgml/func.sgml | 21 +++++++ doc/src/sgml/high-availability.sgml | 2 +- doc/src/sgml/recovery-config.sgml | 3 +- src/backend/access/transam/xlog.c | 6 -- src/backend/access/transam/xlogfuncs.c | 66 ++++++++++++++++++++++ src/backend/catalog/system_views.sql | 7 +++ src/include/access/xlog.h | 6 ++ src/include/catalog/pg_proc.dat | 4 ++ src/test/recovery/t/004_timeline_switch.pl | 6 +- src/test/recovery/t/009_twophase.pl | 6 +- 10 files changed, 116 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9a7f683658..5ecf7f5b9d 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -18731,6 +18731,9 @@ SELECT set_config('log_statement_stats', 'off', false); <indexterm> <primary>pg_terminate_backend</primary> </indexterm> + <indexterm> + <primary>pg_promote</primary> + </indexterm> <indexterm> <primary>signal</primary> @@ -18790,6 +18793,16 @@ SELECT set_config('log_statement_stats', 'off', false); however only superusers can terminate superuser backends. </entry> </row> + <row> + <entry> + <literal><function>pg_promote(<parameter>wait</parameter> <type>boolean</type> DEFAULT true)</function></literal> + </entry> + <entry><type>boolean</type></entry> + <entry>Promote a physical standby server. This function is restricted to + superusers by default, but other users can be granted EXECUTE to run + the function. + </entry> + </row> </tbody> </tgroup> </table> @@ -18827,6 +18840,14 @@ SELECT set_config('log_statement_stats', 'off', false); subprocess. </para> + <para> + <function>pg_promote</function> can only be called on standby servers. + If the argument <parameter>wait</parameter> is <literal>true</literal>, + the function waits until promotion is complete or 60 seconds have passed, + otherwise the function returns immediately after sending the promotion + signal to the postmaster. + </para> + </sect2> <sect2 id="functions-admin-backup"> diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index 6f57362df7..8ccd1ffcd1 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -1472,7 +1472,7 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)' <para> To trigger failover of a log-shipping standby server, - run <command>pg_ctl promote</command> or create a trigger + run <command>pg_ctl promote</command>, call <function>pg_promote()</function>, or create a trigger file with the file name and path specified by the <varname>trigger_file</varname> setting in <filename>recovery.conf</filename>. If you're planning to use <command>pg_ctl promote</command> to fail over, <varname>trigger_file</varname> is diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index 92825fdf19..d06cd0b08e 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -439,7 +439,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows <para> Specifies a trigger file whose presence ends recovery in the standby. Even if this value is not set, you can still promote - the standby using <command>pg_ctl promote</command>. + the standby using <command>pg_ctl promote</command> or calling + <function>pg_promote()</function>. This setting has no effect if <varname>standby_mode</varname> is <literal>off</literal>. </para> </listitem> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7375a78ffc..62fc418893 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -78,12 +78,6 @@ extern uint32 bootstrap_data_checksum_version; -/* File path names (all relative to $PGDATA) */ -#define RECOVERY_COMMAND_FILE "recovery.conf" -#define RECOVERY_COMMAND_DONE "recovery.done" -#define PROMOTE_SIGNAL_FILE "promote" -#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote" - /* User-settable parameters */ int max_wal_size_mb = 1024; /* 1 GB */ diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 9731742978..7cd5db001e 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -35,6 +35,7 @@ #include "storage/fd.h" #include "storage/ipc.h" +#include <unistd.h> /* * Store label file and tablespace map during non-exclusive backups. @@ -697,3 +698,68 @@ pg_backup_start_time(PG_FUNCTION_ARGS) PG_RETURN_DATUM(xtime); } + +#define WAIT_SECONDS 60 +#define WAITS_PER_SECOND 10 + +/* + * Promote a standby server. + * + * A result of "true" means that promotion has been initiated. + */ +Datum +pg_promote(PG_FUNCTION_ARGS) +{ + bool wait = PG_GETARG_BOOL(0); + FILE *promote_file; + int i; + + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Only a server that is in recovery can be promoted."))); + + /* create the promote signal file */ + promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w"); + if (!promote_file) + { + ereport(WARNING, + (errmsg("could not create file \"%s\": %m", PROMOTE_SIGNAL_FILE))); + PG_RETURN_BOOL(false); + } + + if (FreeFile(promote_file)) + { + /* probably unreachable, but it is better to be safe */ + ereport(WARNING, + (errmsg("could not write to file \"%s\": %m", PROMOTE_SIGNAL_FILE))); + PG_RETURN_BOOL(false); + } + + /* signal the postmaster */ + if (kill(PostmasterPid, SIGUSR1) != 0) + { + ereport(WARNING, + (errmsg("failed to send signal to postmaster: %m"))); + (void) unlink(PROMOTE_SIGNAL_FILE); + PG_RETURN_BOOL(false); + } + + /* return immediately if waiting was not requested */ + if (wait) + PG_RETURN_BOOL(true); + + /* wait for up to a minute for promotion */ + for (i = 0; i < WAITS_PER_SECOND * WAIT_SECONDS; ++i) + { + if (!RecoveryInProgress()) + PG_RETURN_BOOL(true); + + pg_usleep(1000000L / WAITS_PER_SECOND); + } + + ereport(WARNING, + (errmsg("server did not promote within %d seconds", WAIT_SECONDS))); + PG_RETURN_BOOL(false); +} diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 7251552419..9c9cf84c10 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1119,6 +1119,13 @@ LANGUAGE INTERNAL STRICT IMMUTABLE PARALLEL SAFE AS 'jsonb_insert'; +CREATE OR REPLACE FUNCTION + pg_promote(wait boolean DEFAULT true) +RETURNS boolean +LANGUAGE INTERNAL +STRICT VOLATILE +AS 'pg_promote'; + -- -- The default permissions for functions mean that anyone can execute them. -- A number of functions shouldn't be executable by just anyone, but rather diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 421ba6d775..e01d12eb7c 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -319,10 +319,16 @@ extern void do_pg_abort_backup(void); extern SessionBackupState get_backup_status(void); /* File path names (all relative to $PGDATA) */ +#define RECOVERY_COMMAND_FILE "recovery.conf" +#define RECOVERY_COMMAND_DONE "recovery.done" #define BACKUP_LABEL_FILE "backup_label" #define BACKUP_LABEL_OLD "backup_label.old" #define TABLESPACE_MAP "tablespace_map" #define TABLESPACE_MAP_OLD "tablespace_map.old" +/* files to signal promotion to primary */ +#define PROMOTE_SIGNAL_FILE "promote" +#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote" + #endif /* XLOG_H */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8e4145f42b..1598f930a4 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5997,6 +5997,10 @@ proname => 'pg_backup_start_time', provolatile => 's', prorettype => 'timestamptz', proargtypes => '', prosrc => 'pg_backup_start_time' }, +{ oid => '3436', descr => 'promote standby server', + proname => 'pg_promote', provolatile => 'v', + prorettype => 'bool', proargtypes => 'bool', proargnames => '{wait}', + prosrc => 'pg_promote' }, { oid => '2848', descr => 'switch to new wal file', proname => 'pg_switch_wal', provolatile => 'v', prorettype => 'pg_lsn', proargtypes => '', prosrc => 'pg_switch_wal' }, diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl index 34ee335129..fd440ca9dd 100644 --- a/src/test/recovery/t/004_timeline_switch.pl +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -37,9 +37,11 @@ $node_master->safe_psql('postgres', $node_master->wait_for_catchup($node_standby_1, 'replay', $node_master->lsn('write')); -# Stop and remove master, and promote standby 1, switching it to a new timeline +# Stop and remove master $node_master->teardown_node; -$node_standby_1->promote; + +# promote standby 1 using "pg_promote", switching it to a new timeline +$node_standby_1->safe_psql('postgres', "SELECT pg_promote()"); # Switch standby 2 to replay from standby 1 rmtree($node_standby_2->data_dir . '/recovery.conf'); diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 9ea3bd65fc..a6044179bc 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -214,7 +214,11 @@ $cur_master->psql( INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_master_name}'); PREPARE TRANSACTION 'xact_009_10';"); $cur_master->teardown_node; -$cur_standby->promote; + +# promote standby using "pg_promote" and wait until it is promoted +$cur_standby->safe_psql('postgres', 'SELECT pg_promote(FALSE)'); +$cur_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") + or die "standby never exited recovery"; # change roles note "Now paris is master and london is standby"; -- 2.17.2