> The InitPostgres() case occurs when the server is started in bootstrap
> mode (during initdb) or in single-user mode (postgres --single). I do
> not see any reason why we shouldn't produce progress messages in at
> least the latter case. I suspect that someone who is in the rather
> desperate scenario of having to use single-user mode would really like
> to know how long the server is going to take to start up.

Thanks for sharing the information. I have done the necessary changes
to show the logs during the latter case (postgres --single) and
verified the log messages.

> +1 to emit the same log messages in single-user mode and basically
> whoever calls StartupXLOG. Do we need to adjust the GUC parameter
> log_startup_progress_interval(a reasonable value) so that the logs are
> generated by default?

At present, this feature is enabled by default and the initial value
set for log_startup_progress_interval is 10 seconds.


On Wed, Jul 28, 2021 at 9:07 PM Robert Haas <robertmh...@gmail.com> wrote:
>
> On Wed, Jul 28, 2021 at 11:25 AM Bharath Rupireddy
> <bharath.rupireddyforpostg...@gmail.com> wrote:
> > > Perhaps during initdb we don't want messages, but I'm not sure that we
> > > need to do anything about that here. None of the messages that the
> > > server normally produces show up when you run initdb, so I guess they
> > > are getting redirected to /dev/null or something.
> >
> > I enabled the below log message in XLogFlush and ran initdb, it is
> > printing these logs onto the stdout, looks like the logs have not been
> > redirected to /dev/null in initdb/bootstrap mode.
> >
> > #ifdef WAL_DEBUG
> > if (XLOG_DEBUG)
> > elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
> > LSN_FORMAT_ARGS(record),
> > LSN_FORMAT_ARGS(LogwrtResult.Write),
> > LSN_FORMAT_ARGS(LogwrtResult.Flush));
> > #endif
> >
> > So, even in bootstrap mode, can we use the above #ifdef WAL_DEBUG and
> > XLOG_DEBUG to print those logs? It looks like the problem with these
> > macros is that they are not settable by normal users in the production
> > environment, but only by the developers. I'm not sure how much it is
> > helpful to print the startup process progress logs in bootstrap mode.
> > Maybe we can use the IsBootstrapProcessingMode macro to disable these
> > logs in the bootstrap mode.
>
> I don't think we should drag XLOG_DEBUG into this. That's a debugging
> facility that isn't really relevant to the topic at hand. The point is
> the server normally prints a bunch of messages that we don't see in
> bootstrap mode. For example:
>
> [rhaas pgsql]$ postgres
> 2021-07-28 11:32:33.824 EDT [36801] LOG:  starting PostgreSQL 15devel
> on x86_64-apple-darwin19.6.0, compiled by clang version 5.0.2
> (tags/RELEASE_502/final), 64-bit
> 2021-07-28 11:32:33.825 EDT [36801] LOG:  listening on IPv6 address
> "::1", port 5432
> 2021-07-28 11:32:33.825 EDT [36801] LOG:  listening on IPv4 address
> "127.0.0.1", port 5432
> 2021-07-28 11:32:33.826 EDT [36801] LOG:  listening on Unix socket
> "/tmp/.s.PGSQL.5432"
> 2021-07-28 11:32:33.846 EDT [36802] LOG:  database system was shut
> down at 2021-07-28 11:32:32 EDT
> 2021-07-28 11:32:33.857 EDT [36801] LOG:  database system is ready to
> accept connections
>
> None of that shows up when you run initdb. Taking a fast look at the
> code, I don't think the reasons are the same for all of those
> messages. Some of the code isn't reached, whereas e.g. "database
> system was shut down at 2021-07-28 11:32:32 EDT" is special-cased. I'm
> not sure right off the top of my head what this code should do, but
> ideally it looks something like one of the cases we've already got.
>
> --
> Robert Haas
> EDB: http://www.enterprisedb.com
From 55c73bb0920e3487b72d2588c43c1c5e8798e6d4 Mon Sep 17 00:00:00 2001
From: Nitin <nitin.jadhav@enterprisedb.com>
Date: Thu, 29 Jul 2021 14:11:24 +0530
Subject: [PATCH] Shows the progress of the operations performed during startup
 process. The interval between each progress update is configurable and it
 should be mentioned in seconds

---
 src/backend/access/transam/xlog.c             | 163 ++++++++++++++++++++++++++
 src/backend/postmaster/startup.c              |   1 +
 src/backend/storage/file/fd.c                 |  13 ++
 src/backend/storage/file/reinit.c             |  17 +++
 src/backend/utils/init/postinit.c             |   1 +
 src/backend/utils/misc/guc.c                  |  12 ++
 src/backend/utils/misc/postgresql.conf.sample |   6 +
 src/include/access/xlog.h                     |  18 +++
 src/include/utils/timeout.h                   |   1 +
 9 files changed, 232 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3479402..5a39da7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -79,6 +79,7 @@
 #include "utils/relmapper.h"
 #include "utils/pg_rusage.h"
 #include "utils/snapmgr.h"
+#include "utils/timeout.h"
 #include "utils/timestamp.h"
 
 extern uint32 bootstrap_data_checksum_version;
@@ -397,6 +398,23 @@ static bool doRequestWalReceiverReply;
  */
 static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
 
+/*
+ * Start timestamp of the operation that occur during startup process.
+ */
+static TimestampTz startupProcessOpStartTime;
+
+/*
+ * Indicates whether the startup progress interval mentioned by the user is
+ * elapsed or not. TRUE if timeout occurred, FALSE otherwise.
+ */
+static bool logStartupProgressTimeout;
+
+/*
+ * The interval between each progress update of the operations that occur
+ * during startup process.
+ */
+int	log_startup_progress_interval;
+
 /*----------
  * Shared-memory data structures for XLOG control
  *
@@ -7350,6 +7368,7 @@ StartupXLOG(void)
 			ereport(LOG,
 					(errmsg("redo starts at %X/%X",
 							LSN_FORMAT_ARGS(ReadRecPtr))));
+			InitStartupProgress();
 
 			/*
 			 * main redo apply loop
@@ -7358,6 +7377,8 @@ StartupXLOG(void)
 			{
 				bool		switchedTLI = false;
 
+				LogStartupProgress(STARTUP_PROCESS_OP_REDO, NULL);
+
 #ifdef WAL_DEBUG
 				if (XLOG_DEBUG ||
 					(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
@@ -13021,3 +13042,145 @@ XLogRequestWalReceiverReply(void)
 {
 	doRequestWalReceiverReply = true;
 }
+
+/*
+ * Schedule a wakeup call for logging the progress of startup process.
+ */
+void
+LogStartupProgressTimeoutHandler(void)
+{
+	logStartupProgressTimeout = true;
+}
+
+/*
+ * Sets the start timestamp of the current operation and also enables the
+ * timeout for logging the progress of startup process.
+ */
+void
+InitStartupProgress(void)
+{
+	startupProcessOpStartTime = GetCurrentTimestamp();
+	enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT,
+						 (log_startup_progress_interval * 1000));
+}
+
+/*
+ * Logs the progress of the operations performed during startup process.
+ */
+void
+LogStartupProgress(StartupProcessOp operation, const char *path)
+{
+	long        secs;
+	int         usecs;
+	int			elapsed_ms;
+	int			interval_in_ms;
+
+	/* If the feature is disabled, then no need to proceed further. */
+	if (log_startup_progress_interval < 0)
+		return;
+
+	/* If the timeout has not occurred, then no need to log the details. */
+	if (!logStartupProgressTimeout)
+		return;
+
+	/* Timeout has occurred. Calculate the elapsed time. */
+	TimestampDifference(startupProcessOpStartTime,
+						GetCurrentTimestamp(),
+						&secs, &usecs);
+
+	/*
+	 * Enable the timer for the next log message based on the time that current
+	 * log message timer was supposed to fire.
+	 */
+	elapsed_ms = (secs * 1000) + (usecs / 1000);
+	interval_in_ms = log_startup_progress_interval * 1000;
+	enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT,
+						 (interval_in_ms - (elapsed_ms % interval_in_ms)));
+
+	switch(operation)
+	{
+		case STARTUP_PROCESS_OP_SYNCFS:
+			ereport(LOG,
+					(errmsg("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		case STARTUP_PROCESS_OP_FSYNC:
+			ereport(LOG,
+					(errmsg("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		case STARTUP_PROCESS_OP_REDO:
+			ereport(LOG,
+					(errmsg("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+							secs, (usecs / 10000), LSN_FORMAT_ARGS(ReadRecPtr))));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
+							secs, (usecs / 10000), path)));
+			break;
+		default:
+			ereport(ERROR,
+					(errmsg("unrecognized operation (%d) in startup progress update",
+							operation)));
+			break;
+	}
+
+	logStartupProgressTimeout = false;
+}
+
+/*
+ * Logs the completion of the operation performed during startup process and
+ * disables the timeout used for logging the progress.
+ */
+void
+LogStartupProgressComplete(StartupProcessOp operation)
+{
+	long        secs;
+	int         usecs;
+
+	/* If the feature is disabled, then no need to proceed further. */
+	if (log_startup_progress_interval < 0)
+		return;
+
+	/* Calculate the elapsed time. */
+	TimestampDifference(startupProcessOpStartTime,
+						GetCurrentTimestamp(),
+						&secs, &usecs);
+
+	switch(operation)
+	{
+		case STARTUP_PROCESS_OP_SYNCFS:
+			ereport(LOG,
+					(errmsg("data directory sync (syncfs) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		case STARTUP_PROCESS_OP_FSYNC:
+			ereport(LOG,
+					(errmsg("data directory sync (fsync) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (init) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP:
+			ereport(LOG,
+					(errmsg("resetting unlogged relations (cleanup) complete after %ld.%02d s",
+							secs, (usecs / 10000))));
+			break;
+		default:
+			ereport(ERROR,
+					(errmsg("unrecognized operation (%d) in startup progress update",
+							operation)));
+			break;
+	}
+
+	disable_timeout(LOG_STARTUP_PROGRESS_TIMEOUT, false);
+}
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 69077bd..08c271c 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -232,6 +232,7 @@ StartupProcessMain(void)
 	RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
 	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
+	RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler);
 
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 5d5e8ae..87f54fe 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -100,6 +100,7 @@
 #include "storage/ipc.h"
 #include "utils/guc.h"
 #include "utils/resowner_private.h"
+#include "utils/timeout.h"
 
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
@@ -3334,6 +3335,8 @@ do_syncfs(const char *path)
 {
 	int			fd;
 
+	LogStartupProgress(STARTUP_PROCESS_OP_SYNCFS, path);
+
 	fd = OpenTransientFile(path, O_RDONLY);
 	if (fd < 0)
 	{
@@ -3410,6 +3413,8 @@ SyncDataDirectory(void)
 		DIR		   *dir;
 		struct dirent *de;
 
+		InitStartupProgress();
+
 		/*
 		 * On Linux, we don't have to open every single file one by one.  We
 		 * can use syncfs() to sync whole filesystems.  We only expect
@@ -3436,6 +3441,8 @@ SyncDataDirectory(void)
 		/* If pg_wal is a symlink, process that too. */
 		if (xlog_is_symlink)
 			do_syncfs("pg_wal");
+
+		LogStartupProgressComplete(STARTUP_PROCESS_OP_SYNCFS);
 		return;
 	}
 #endif							/* !HAVE_SYNCFS */
@@ -3452,6 +3459,8 @@ SyncDataDirectory(void)
 	walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
 #endif
 
+	InitStartupProgress();
+
 	/*
 	 * Now we do the fsync()s in the same order.
 	 *
@@ -3465,6 +3474,8 @@ SyncDataDirectory(void)
 	if (xlog_is_symlink)
 		walkdir("pg_wal", datadir_fsync_fname, false, LOG);
 	walkdir("pg_tblspc", datadir_fsync_fname, true, LOG);
+
+	LogStartupProgressComplete(STARTUP_PROCESS_OP_FSYNC);
 }
 
 /*
@@ -3583,6 +3594,8 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
 static void
 datadir_fsync_fname(const char *fname, bool isdir, int elevel)
 {
+	LogStartupProgress(STARTUP_PROCESS_OP_FSYNC, fname);
+
 	/*
 	 * We want to silently ignoring errors about unreadable files.  Pass that
 	 * desire on to fsync_fname_ext().
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 40c758d..5744d07 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -16,12 +16,14 @@
 
 #include <unistd.h>
 
+#include "access/xlog.h"
 #include "common/relpath.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
 #include "storage/reinit.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
+#include "utils/timeout.h"
 
 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
 												  int op);
@@ -65,6 +67,8 @@ ResetUnloggedRelations(int op)
 								   ALLOCSET_DEFAULT_SIZES);
 	oldctx = MemoryContextSwitchTo(tmpctx);
 
+	InitStartupProgress();
+
 	/*
 	 * First process unlogged files in pg_default ($PGDATA/base)
 	 */
@@ -86,6 +90,11 @@ ResetUnloggedRelations(int op)
 		ResetUnloggedRelationsInTablespaceDir(temp_path, op);
 	}
 
+	if (op & UNLOGGED_RELATION_INIT)
+		LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT);
+	else if (op & UNLOGGED_RELATION_CLEANUP)
+		LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP);
+
 	FreeDir(spc_dir);
 
 	/*
@@ -136,6 +145,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
 		snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
 				 tsdirname, de->d_name);
+
+		if (op & UNLOGGED_RELATION_INIT)
+			LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT,
+							   dbspace_path);
+		else if (op & UNLOGGED_RELATION_CLEANUP)
+			LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP,
+							   dbspace_path);
+
 		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
 	}
 
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 51d1bbe..6b1dd4b 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -622,6 +622,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
 						IdleInTransactionSessionTimeoutHandler);
 		RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
 		RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
+		RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler);
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a2e0f8d..82bbc88 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3546,6 +3546,18 @@ static struct config_int ConfigureNamesInt[] =
 		check_client_connection_check_interval, NULL, NULL
 	},
 
+	{
+		{"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+			gettext_noop("Sets the time interval between each progress update "
+						 "of the startup process."),
+			gettext_noop("0 logs all messages. -1 turns this feature off."),
+			GUC_UNIT_S,
+		},
+		&log_startup_progress_interval,
+		-1, -1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b242a7f..901e4ba 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -523,6 +523,12 @@
 					# are logged regardless of their duration; 1.0 logs all
 					# statements from all transactions, 0.0 never logs
 
+log_startup_progress_interval = 10	# Sets the time interval between each
+									# progress update of the startup process.
+									# -1 disables the feature, 0 logs all
+									# messages, > 0 indicates the interval in
+									# seconds.
+
 # - What to Log -
 
 #debug_print_parse = off
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index ccfcf43..f17d91b 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -132,6 +132,7 @@ extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
 extern bool wal_receiver_create_temp_slot;
 extern bool track_wal_io_timing;
+extern int  log_startup_progress_interval;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
@@ -295,6 +296,18 @@ typedef enum WALAvailability
 	WALAVAIL_REMOVED			/* WAL segment has been removed */
 } WALAvailability;
 
+/*
+ * Codes of the operations performed during startup process
+ */
+typedef enum StartupProcessOp
+{
+	STARTUP_PROCESS_OP_SYNCFS,
+	STARTUP_PROCESS_OP_FSYNC,
+	STARTUP_PROCESS_OP_REDO,
+	STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT,
+	STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP
+} StartupProcessOp;
+
 struct XLogRecData;
 
 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
@@ -399,6 +412,11 @@ extern void do_pg_abort_backup(int code, Datum arg);
 extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
 
+extern void InitStartupProgress(void);
+extern void LogStartupProgress(StartupProcessOp operation, const char *path);
+extern void LogStartupProgressComplete(StartupProcessOp operation);
+extern void LogStartupProgressTimeoutHandler(void);
+
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_SIGNAL_FILE	"recovery.signal"
 #define STANDBY_SIGNAL_FILE		"standby.signal"
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 93e6a69..ef892dd 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -33,6 +33,7 @@ typedef enum TimeoutId
 	IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
 	IDLE_SESSION_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
+	LOG_STARTUP_PROGRESS_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */
-- 
1.8.3.1

Reply via email to