Michael Paquier wrote:
> > diff --git a/src/backend/access/transam/xlog.c 
> > b/src/backend/access/transam/xlog.c
> > index 7375a78ffc..3a1f49e83a 100644
> > --- a/src/backend/access/transam/xlog.c
> > +++ b/src/backend/access/transam/xlog.c
> > @@ -81,8 +81,6 @@ extern uint32 bootstrap_data_checksum_version;
> >  /* File path names (all relative to $PGDATA) */
> >  #define RECOVERY_COMMAND_FILE      "recovery.conf"
> >  #define RECOVERY_COMMAND_DONE      "recovery.done"
> > -#define PROMOTE_SIGNAL_FILE                "promote"
> > -#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
> 
> Perhaps we could just move the whole set to xlog.h.

Done.

> > +Datum
> > +pg_promote(PG_FUNCTION_ARGS)
> > +{
> > +   FILE *promote_file;
> > +
> > +   if (!superuser())
> > +           ereport(ERROR,
> > +                           (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
> > +                            errmsg("must be superuser to promote standby 
> > servers")));
> 
> Let's remove this restriction at code level, and instead use
> function-level ACLs so as it is possible to allow non-superusers to
> trigger a promotion as well, say a dedicated role.  We could add a
> system role for this purpose, but I am not sure that it is worth the
> addition yet.

Agreed, done.

> > +   while ($self->safe_psql('postgres', "SELECT pg_is_in_recovery()") != 
> > 'f')
> > +   {
> > +            sleep 1;
> > +   }
> > +   return;
> 
> This could go on infinitely, locking down buildfarm machines silently if
> a commit is not careful.  What I would suggest to do instead is to not
> touch PostgresNode.pm at all, and to just modify one test to trigger
> promotion with the SQL function.  Then from the test, you should add a
> comment that triggerring promotion from SQL is wanted instead of the
> internal routine, and then please add a call to poll_query_until() in
> the same way as what 6deb52b2 has removed.

I have modified recovery/t/004_timeline_switch.pl and recovery/t/009_twophase.pl
accordingly: one calls the function with "wait => true", the other
uses "wait => false" and waits as you suggested.

> As of now, this function returns true if promotion has been triggered,
> and false if not.  However it seems to me that we should have something
> more consistent with "pg_ctl promote", so there are actually three
> possible states:
> 1) Node is in recovery, with promotion triggered.  pg_promote() returns
> true in case of success in creating the trigger file.
> 2) Node is in recovery, with promotion triggered.  pg_promote() returns
> false in case of failure in creating the trigger file.
> 3) Node is not in recovery, where I think a call to pg_promote should
> trigger an error.  This is not handled in the patch.

So far I had returned "false" in the last case, but I am fine with
throwing an error in that case.  Done.

> An other thing which has value is to implement a "wait" mode for this
> feature, or actually a nowait mode.  You could simply do what pg_ctl
> does with its logic in get_control_dbstate() by looking at the control
> file state.  I think that waiting for the promotion to happen should be
> the default.

I have implemented that.

> At the end this patch needs a bit more work.

Yes, it did.  Thanks for the thorough review!

Yours,
Laurenz Albe
From 8e45dc7dfbb76eb1625323e3cd8f161779973528 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.a...@cybertec.at>
Date: Mon, 15 Oct 2018 16:07:56 +0200
Subject: [PATCH] Add pg_promote() to promote standby servers

---
 doc/src/sgml/func.sgml                     | 21 +++++++
 doc/src/sgml/high-availability.sgml        |  2 +-
 doc/src/sgml/recovery-config.sgml          |  3 +-
 src/backend/access/transam/xlog.c          |  6 --
 src/backend/access/transam/xlogfuncs.c     | 66 ++++++++++++++++++++++
 src/backend/catalog/system_views.sql       |  7 +++
 src/include/access/xlog.h                  |  6 ++
 src/include/catalog/pg_proc.dat            |  4 ++
 src/test/recovery/t/004_timeline_switch.pl |  6 +-
 src/test/recovery/t/009_twophase.pl        |  6 +-
 10 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9a7f683658..5ecf7f5b9d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18731,6 +18731,9 @@ SELECT set_config('log_statement_stats', 'off', false);
    <indexterm>
     <primary>pg_terminate_backend</primary>
    </indexterm>
+   <indexterm>
+    <primary>pg_promote</primary>
+   </indexterm>
 
    <indexterm>
     <primary>signal</primary>
@@ -18790,6 +18793,16 @@ SELECT set_config('log_statement_stats', 'off', false);
         however only superusers can terminate superuser backends.
        </entry>
       </row>
+      <row>
+       <entry>
+        <literal><function>pg_promote(<parameter>wait</parameter> <type>boolean</type> DEFAULT true)</function></literal>
+        </entry>
+       <entry><type>boolean</type></entry>
+       <entry>Promote a physical standby server.  This function is restricted to
+        superusers by default, but other users can be granted EXECUTE to run
+        the function.
+       </entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -18827,6 +18840,14 @@ SELECT set_config('log_statement_stats', 'off', false);
     subprocess.
    </para>
 
+   <para>
+    <function>pg_promote</function> can only be called on standby servers.
+    If the argument <parameter>wait</parameter> is <literal>true</literal>,
+    the function waits until promotion is complete or 60 seconds have passed,
+    otherwise the function returns immediately after sending the promotion
+    signal to the postmaster.
+   </para>
+
   </sect2>
 
   <sect2 id="functions-admin-backup">
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 6f57362df7..8ccd1ffcd1 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1472,7 +1472,7 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
 
    <para>
     To trigger failover of a log-shipping standby server,
-    run <command>pg_ctl promote</command> or create a trigger
+    run <command>pg_ctl promote</command>, call <function>pg_promote()</function>, or create a trigger
     file with the file name and path specified by the <varname>trigger_file</varname>
     setting in <filename>recovery.conf</filename>. If you're planning to use
     <command>pg_ctl promote</command> to fail over, <varname>trigger_file</varname> is
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 92825fdf19..d06cd0b08e 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -439,7 +439,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
          <para>
           Specifies a trigger file whose presence ends recovery in the
           standby.  Even if this value is not set, you can still promote
-          the standby using <command>pg_ctl promote</command>.
+          the standby using <command>pg_ctl promote</command> or calling
+          <function>pg_promote()</function>.
           This setting has no effect if <varname>standby_mode</varname> is <literal>off</literal>.
          </para>
         </listitem>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7375a78ffc..62fc418893 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -78,12 +78,6 @@
 
 extern uint32 bootstrap_data_checksum_version;
 
-/* File path names (all relative to $PGDATA) */
-#define RECOVERY_COMMAND_FILE	"recovery.conf"
-#define RECOVERY_COMMAND_DONE	"recovery.done"
-#define PROMOTE_SIGNAL_FILE		"promote"
-#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
-
 
 /* User-settable parameters */
 int			max_wal_size_mb = 1024; /* 1 GB */
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 9731742978..7cd5db001e 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -35,6 +35,7 @@
 #include "storage/fd.h"
 #include "storage/ipc.h"
 
+#include <unistd.h>
 
 /*
  * Store label file and tablespace map during non-exclusive backups.
@@ -697,3 +698,68 @@ pg_backup_start_time(PG_FUNCTION_ARGS)
 
 	PG_RETURN_DATUM(xtime);
 }
+
+#define WAIT_SECONDS 60
+#define WAITS_PER_SECOND 10
+
+/*
+ * Promote a standby server.
+ *
+ * A result of "true" means that promotion has been initiated.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+	bool wait = PG_GETARG_BOOL(0);
+	FILE *promote_file;
+	int i;
+
+	if (!RecoveryInProgress())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("recovery is not in progress"),
+				 errhint("Only a server that is in recovery can be promoted.")));
+
+	/* create the promote signal file */
+	promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+	if (!promote_file)
+	{
+		ereport(WARNING,
+				(errmsg("could not create file \"%s\": %m", PROMOTE_SIGNAL_FILE)));
+		PG_RETURN_BOOL(false);
+	}
+
+	if (FreeFile(promote_file))
+	{
+		/* probably unreachable, but it is better to be safe */
+		ereport(WARNING,
+				(errmsg("could not write to file \"%s\": %m", PROMOTE_SIGNAL_FILE)));
+		PG_RETURN_BOOL(false);
+	}
+
+	/* signal the postmaster */
+	if (kill(PostmasterPid, SIGUSR1) != 0)
+	{
+		ereport(WARNING,
+				(errmsg("failed to send signal to postmaster: %m")));
+		(void) unlink(PROMOTE_SIGNAL_FILE);
+		PG_RETURN_BOOL(false);
+	}
+
+	/* return immediately if waiting was not requested */
+	if (wait)
+		PG_RETURN_BOOL(true);
+
+	/* wait for up to a minute for promotion */
+	for (i = 0; i < WAITS_PER_SECOND * WAIT_SECONDS; ++i)
+	{
+		if (!RecoveryInProgress())
+			PG_RETURN_BOOL(true);
+
+		pg_usleep(1000000L / WAITS_PER_SECOND);
+	}
+
+	ereport(WARNING,
+			(errmsg("server did not promote within %d seconds", WAIT_SECONDS)));
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 7251552419..9c9cf84c10 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1119,6 +1119,13 @@ LANGUAGE INTERNAL
 STRICT IMMUTABLE PARALLEL SAFE
 AS 'jsonb_insert';
 
+CREATE OR REPLACE FUNCTION
+  pg_promote(wait boolean DEFAULT true)
+RETURNS boolean
+LANGUAGE INTERNAL
+STRICT VOLATILE
+AS 'pg_promote';
+
 --
 -- The default permissions for functions mean that anyone can execute them.
 -- A number of functions shouldn't be executable by just anyone, but rather
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e01d12eb7c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -319,10 +319,16 @@ extern void do_pg_abort_backup(void);
 extern SessionBackupState get_backup_status(void);
 
 /* File path names (all relative to $PGDATA) */
+#define RECOVERY_COMMAND_FILE	"recovery.conf"
+#define RECOVERY_COMMAND_DONE	"recovery.done"
 #define BACKUP_LABEL_FILE		"backup_label"
 #define BACKUP_LABEL_OLD		"backup_label.old"
 
 #define TABLESPACE_MAP			"tablespace_map"
 #define TABLESPACE_MAP_OLD		"tablespace_map.old"
 
+/* files to signal promotion to primary */
+#define PROMOTE_SIGNAL_FILE		"promote"
+#define FALLBACK_PROMOTE_SIGNAL_FILE  "fallback_promote"
+
 #endif							/* XLOG_H */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 8e4145f42b..1598f930a4 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5997,6 +5997,10 @@
   proname => 'pg_backup_start_time', provolatile => 's',
   prorettype => 'timestamptz', proargtypes => '',
   prosrc => 'pg_backup_start_time' },
+{ oid => '3436', descr => 'promote standby server',
+  proname => 'pg_promote', provolatile => 'v',
+  prorettype => 'bool', proargtypes => 'bool', proargnames => '{wait}',
+  prosrc => 'pg_promote' },
 { oid => '2848', descr => 'switch to new wal file',
   proname => 'pg_switch_wal', provolatile => 'v', prorettype => 'pg_lsn',
   proargtypes => '', prosrc => 'pg_switch_wal' },
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
index 34ee335129..fd440ca9dd 100644
--- a/src/test/recovery/t/004_timeline_switch.pl
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -37,9 +37,11 @@ $node_master->safe_psql('postgres',
 $node_master->wait_for_catchup($node_standby_1, 'replay',
 	$node_master->lsn('write'));
 
-# Stop and remove master, and promote standby 1, switching it to a new timeline
+# Stop and remove master
 $node_master->teardown_node;
-$node_standby_1->promote;
+
+# promote standby 1 using "pg_promote", switching it to a new timeline
+$node_standby_1->safe_psql('postgres', "SELECT pg_promote()");
 
 # Switch standby 2 to replay from standby 1
 rmtree($node_standby_2->data_dir . '/recovery.conf');
diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
index 9ea3bd65fc..a6044179bc 100644
--- a/src/test/recovery/t/009_twophase.pl
+++ b/src/test/recovery/t/009_twophase.pl
@@ -214,7 +214,11 @@ $cur_master->psql(
 	INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_master_name}');
 	PREPARE TRANSACTION 'xact_009_10';");
 $cur_master->teardown_node;
-$cur_standby->promote;
+
+# promote standby using "pg_promote" and wait until it is promoted
+$cur_standby->safe_psql('postgres', 'SELECT pg_promote(FALSE)');
+$cur_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()")
+	or die "standby never exited recovery";
 
 # change roles
 note "Now paris is master and london is standby";
-- 
2.17.2

Reply via email to