> The InitPostgres() case occurs when the server is started in bootstrap > mode (during initdb) or in single-user mode (postgres --single). I do > not see any reason why we shouldn't produce progress messages in at > least the latter case. I suspect that someone who is in the rather > desperate scenario of having to use single-user mode would really like > to know how long the server is going to take to start up.
Thanks for sharing the information. I have done the necessary changes to show the logs during the latter case (postgres --single) and verified the log messages. > +1 to emit the same log messages in single-user mode and basically > whoever calls StartupXLOG. Do we need to adjust the GUC parameter > log_startup_progress_interval(a reasonable value) so that the logs are > generated by default? At present, this feature is enabled by default and the initial value set for log_startup_progress_interval is 10 seconds. On Wed, Jul 28, 2021 at 9:07 PM Robert Haas <robertmh...@gmail.com> wrote: > > On Wed, Jul 28, 2021 at 11:25 AM Bharath Rupireddy > <bharath.rupireddyforpostg...@gmail.com> wrote: > > > Perhaps during initdb we don't want messages, but I'm not sure that we > > > need to do anything about that here. None of the messages that the > > > server normally produces show up when you run initdb, so I guess they > > > are getting redirected to /dev/null or something. > > > > I enabled the below log message in XLogFlush and ran initdb, it is > > printing these logs onto the stdout, looks like the logs have not been > > redirected to /dev/null in initdb/bootstrap mode. > > > > #ifdef WAL_DEBUG > > if (XLOG_DEBUG) > > elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X", > > LSN_FORMAT_ARGS(record), > > LSN_FORMAT_ARGS(LogwrtResult.Write), > > LSN_FORMAT_ARGS(LogwrtResult.Flush)); > > #endif > > > > So, even in bootstrap mode, can we use the above #ifdef WAL_DEBUG and > > XLOG_DEBUG to print those logs? It looks like the problem with these > > macros is that they are not settable by normal users in the production > > environment, but only by the developers. I'm not sure how much it is > > helpful to print the startup process progress logs in bootstrap mode. > > Maybe we can use the IsBootstrapProcessingMode macro to disable these > > logs in the bootstrap mode. > > I don't think we should drag XLOG_DEBUG into this. That's a debugging > facility that isn't really relevant to the topic at hand. The point is > the server normally prints a bunch of messages that we don't see in > bootstrap mode. For example: > > [rhaas pgsql]$ postgres > 2021-07-28 11:32:33.824 EDT [36801] LOG: starting PostgreSQL 15devel > on x86_64-apple-darwin19.6.0, compiled by clang version 5.0.2 > (tags/RELEASE_502/final), 64-bit > 2021-07-28 11:32:33.825 EDT [36801] LOG: listening on IPv6 address > "::1", port 5432 > 2021-07-28 11:32:33.825 EDT [36801] LOG: listening on IPv4 address > "127.0.0.1", port 5432 > 2021-07-28 11:32:33.826 EDT [36801] LOG: listening on Unix socket > "/tmp/.s.PGSQL.5432" > 2021-07-28 11:32:33.846 EDT [36802] LOG: database system was shut > down at 2021-07-28 11:32:32 EDT > 2021-07-28 11:32:33.857 EDT [36801] LOG: database system is ready to > accept connections > > None of that shows up when you run initdb. Taking a fast look at the > code, I don't think the reasons are the same for all of those > messages. Some of the code isn't reached, whereas e.g. "database > system was shut down at 2021-07-28 11:32:32 EDT" is special-cased. I'm > not sure right off the top of my head what this code should do, but > ideally it looks something like one of the cases we've already got. > > -- > Robert Haas > EDB: http://www.enterprisedb.com
From 55c73bb0920e3487b72d2588c43c1c5e8798e6d4 Mon Sep 17 00:00:00 2001 From: Nitin <nitin.jadhav@enterprisedb.com> Date: Thu, 29 Jul 2021 14:11:24 +0530 Subject: [PATCH] Shows the progress of the operations performed during startup process. The interval between each progress update is configurable and it should be mentioned in seconds --- src/backend/access/transam/xlog.c | 163 ++++++++++++++++++++++++++ src/backend/postmaster/startup.c | 1 + src/backend/storage/file/fd.c | 13 ++ src/backend/storage/file/reinit.c | 17 +++ src/backend/utils/init/postinit.c | 1 + src/backend/utils/misc/guc.c | 12 ++ src/backend/utils/misc/postgresql.conf.sample | 6 + src/include/access/xlog.h | 18 +++ src/include/utils/timeout.h | 1 + 9 files changed, 232 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 3479402..5a39da7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -79,6 +79,7 @@ #include "utils/relmapper.h" #include "utils/pg_rusage.h" #include "utils/snapmgr.h" +#include "utils/timeout.h" #include "utils/timestamp.h" extern uint32 bootstrap_data_checksum_version; @@ -397,6 +398,23 @@ static bool doRequestWalReceiverReply; */ static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr; +/* + * Start timestamp of the operation that occur during startup process. + */ +static TimestampTz startupProcessOpStartTime; + +/* + * Indicates whether the startup progress interval mentioned by the user is + * elapsed or not. TRUE if timeout occurred, FALSE otherwise. + */ +static bool logStartupProgressTimeout; + +/* + * The interval between each progress update of the operations that occur + * during startup process. + */ +int log_startup_progress_interval; + /*---------- * Shared-memory data structures for XLOG control * @@ -7350,6 +7368,7 @@ StartupXLOG(void) ereport(LOG, (errmsg("redo starts at %X/%X", LSN_FORMAT_ARGS(ReadRecPtr)))); + InitStartupProgress(); /* * main redo apply loop @@ -7358,6 +7377,8 @@ StartupXLOG(void) { bool switchedTLI = false; + LogStartupProgress(STARTUP_PROCESS_OP_REDO, NULL); + #ifdef WAL_DEBUG if (XLOG_DEBUG || (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || @@ -13021,3 +13042,145 @@ XLogRequestWalReceiverReply(void) { doRequestWalReceiverReply = true; } + +/* + * Schedule a wakeup call for logging the progress of startup process. + */ +void +LogStartupProgressTimeoutHandler(void) +{ + logStartupProgressTimeout = true; +} + +/* + * Sets the start timestamp of the current operation and also enables the + * timeout for logging the progress of startup process. + */ +void +InitStartupProgress(void) +{ + startupProcessOpStartTime = GetCurrentTimestamp(); + enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT, + (log_startup_progress_interval * 1000)); +} + +/* + * Logs the progress of the operations performed during startup process. + */ +void +LogStartupProgress(StartupProcessOp operation, const char *path) +{ + long secs; + int usecs; + int elapsed_ms; + int interval_in_ms; + + /* If the feature is disabled, then no need to proceed further. */ + if (log_startup_progress_interval < 0) + return; + + /* If the timeout has not occurred, then no need to log the details. */ + if (!logStartupProgressTimeout) + return; + + /* Timeout has occurred. Calculate the elapsed time. */ + TimestampDifference(startupProcessOpStartTime, + GetCurrentTimestamp(), + &secs, &usecs); + + /* + * Enable the timer for the next log message based on the time that current + * log message timer was supposed to fire. + */ + elapsed_ms = (secs * 1000) + (usecs / 1000); + interval_in_ms = log_startup_progress_interval * 1000; + enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT, + (interval_in_ms - (elapsed_ms % interval_in_ms))); + + switch(operation) + { + case STARTUP_PROCESS_OP_SYNCFS: + ereport(LOG, + (errmsg("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + case STARTUP_PROCESS_OP_FSYNC: + ereport(LOG, + (errmsg("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + case STARTUP_PROCESS_OP_REDO: + ereport(LOG, + (errmsg("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X", + secs, (usecs / 10000), LSN_FORMAT_ARGS(ReadRecPtr)))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT: + ereport(LOG, + (errmsg("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP: + ereport(LOG, + (errmsg("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + default: + ereport(ERROR, + (errmsg("unrecognized operation (%d) in startup progress update", + operation))); + break; + } + + logStartupProgressTimeout = false; +} + +/* + * Logs the completion of the operation performed during startup process and + * disables the timeout used for logging the progress. + */ +void +LogStartupProgressComplete(StartupProcessOp operation) +{ + long secs; + int usecs; + + /* If the feature is disabled, then no need to proceed further. */ + if (log_startup_progress_interval < 0) + return; + + /* Calculate the elapsed time. */ + TimestampDifference(startupProcessOpStartTime, + GetCurrentTimestamp(), + &secs, &usecs); + + switch(operation) + { + case STARTUP_PROCESS_OP_SYNCFS: + ereport(LOG, + (errmsg("data directory sync (syncfs) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + case STARTUP_PROCESS_OP_FSYNC: + ereport(LOG, + (errmsg("data directory sync (fsync) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT: + ereport(LOG, + (errmsg("resetting unlogged relations (init) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP: + ereport(LOG, + (errmsg("resetting unlogged relations (cleanup) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + default: + ereport(ERROR, + (errmsg("unrecognized operation (%d) in startup progress update", + operation))); + break; + } + + disable_timeout(LOG_STARTUP_PROGRESS_TIMEOUT, false); +} diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 69077bd..08c271c 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -232,6 +232,7 @@ StartupProcessMain(void) RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler); RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler); RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler); + RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler); /* * Unblock signals (they were blocked when the postmaster forked us) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 5d5e8ae..87f54fe 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -100,6 +100,7 @@ #include "storage/ipc.h" #include "utils/guc.h" #include "utils/resowner_private.h" +#include "utils/timeout.h" /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ #if defined(HAVE_SYNC_FILE_RANGE) @@ -3334,6 +3335,8 @@ do_syncfs(const char *path) { int fd; + LogStartupProgress(STARTUP_PROCESS_OP_SYNCFS, path); + fd = OpenTransientFile(path, O_RDONLY); if (fd < 0) { @@ -3410,6 +3413,8 @@ SyncDataDirectory(void) DIR *dir; struct dirent *de; + InitStartupProgress(); + /* * On Linux, we don't have to open every single file one by one. We * can use syncfs() to sync whole filesystems. We only expect @@ -3436,6 +3441,8 @@ SyncDataDirectory(void) /* If pg_wal is a symlink, process that too. */ if (xlog_is_symlink) do_syncfs("pg_wal"); + + LogStartupProgressComplete(STARTUP_PROCESS_OP_SYNCFS); return; } #endif /* !HAVE_SYNCFS */ @@ -3452,6 +3459,8 @@ SyncDataDirectory(void) walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1); #endif + InitStartupProgress(); + /* * Now we do the fsync()s in the same order. * @@ -3465,6 +3474,8 @@ SyncDataDirectory(void) if (xlog_is_symlink) walkdir("pg_wal", datadir_fsync_fname, false, LOG); walkdir("pg_tblspc", datadir_fsync_fname, true, LOG); + + LogStartupProgressComplete(STARTUP_PROCESS_OP_FSYNC); } /* @@ -3583,6 +3594,8 @@ pre_sync_fname(const char *fname, bool isdir, int elevel) static void datadir_fsync_fname(const char *fname, bool isdir, int elevel) { + LogStartupProgress(STARTUP_PROCESS_OP_FSYNC, fname); + /* * We want to silently ignoring errors about unreadable files. Pass that * desire on to fsync_fname_ext(). diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 40c758d..5744d07 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -16,12 +16,14 @@ #include <unistd.h> +#include "access/xlog.h" #include "common/relpath.h" #include "storage/copydir.h" #include "storage/fd.h" #include "storage/reinit.h" #include "utils/hsearch.h" #include "utils/memutils.h" +#include "utils/timeout.h" static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op); @@ -65,6 +67,8 @@ ResetUnloggedRelations(int op) ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(tmpctx); + InitStartupProgress(); + /* * First process unlogged files in pg_default ($PGDATA/base) */ @@ -86,6 +90,11 @@ ResetUnloggedRelations(int op) ResetUnloggedRelationsInTablespaceDir(temp_path, op); } + if (op & UNLOGGED_RELATION_INIT) + LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT); + else if (op & UNLOGGED_RELATION_CLEANUP) + LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP); + FreeDir(spc_dir); /* @@ -136,6 +145,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", tsdirname, de->d_name); + + if (op & UNLOGGED_RELATION_INIT) + LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT, + dbspace_path); + else if (op & UNLOGGED_RELATION_CLEANUP) + LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP, + dbspace_path); + ResetUnloggedRelationsInDbspaceDir(dbspace_path, op); } diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 51d1bbe..6b1dd4b 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -622,6 +622,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, IdleInTransactionSessionTimeoutHandler); RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler); RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); + RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler); } /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a2e0f8d..82bbc88 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3546,6 +3546,18 @@ static struct config_int ConfigureNamesInt[] = check_client_connection_check_interval, NULL, NULL }, + { + {"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN, + gettext_noop("Sets the time interval between each progress update " + "of the startup process."), + gettext_noop("0 logs all messages. -1 turns this feature off."), + GUC_UNIT_S, + }, + &log_startup_progress_interval, + -1, -1, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b242a7f..901e4ba 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -523,6 +523,12 @@ # are logged regardless of their duration; 1.0 logs all # statements from all transactions, 0.0 never logs +log_startup_progress_interval = 10 # Sets the time interval between each + # progress update of the startup process. + # -1 disables the feature, 0 logs all + # messages, > 0 indicates the interval in + # seconds. + # - What to Log - #debug_print_parse = off diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ccfcf43..f17d91b 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -132,6 +132,7 @@ extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; extern bool track_wal_io_timing; +extern int log_startup_progress_interval; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; @@ -295,6 +296,18 @@ typedef enum WALAvailability WALAVAIL_REMOVED /* WAL segment has been removed */ } WALAvailability; +/* + * Codes of the operations performed during startup process + */ +typedef enum StartupProcessOp +{ + STARTUP_PROCESS_OP_SYNCFS, + STARTUP_PROCESS_OP_FSYNC, + STARTUP_PROCESS_OP_REDO, + STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT, + STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP +} StartupProcessOp; + struct XLogRecData; extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, @@ -399,6 +412,11 @@ extern void do_pg_abort_backup(int code, Datum arg); extern void register_persistent_abort_backup_handler(void); extern SessionBackupState get_backup_status(void); +extern void InitStartupProgress(void); +extern void LogStartupProgress(StartupProcessOp operation, const char *path); +extern void LogStartupProgressComplete(StartupProcessOp operation); +extern void LogStartupProgressTimeoutHandler(void); + /* File path names (all relative to $PGDATA) */ #define RECOVERY_SIGNAL_FILE "recovery.signal" #define STANDBY_SIGNAL_FILE "standby.signal" diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 93e6a69..ef892dd 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -33,6 +33,7 @@ typedef enum TimeoutId IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, + LOG_STARTUP_PROGRESS_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */ -- 1.8.3.1