Re: pg_upgrade should truncate/remove its logs before running

Justin Pryzby Wed, 22 Dec 2021 08:36:40 -0800

On Wed, Dec 22, 2021 at 09:52:26AM -0500, Tom Lane wrote:
> Michael Paquier <[email protected]> writes:
> > On Mon, Dec 20, 2021 at 09:39:26PM -0600, Justin Pryzby wrote:
> >> Are you suggesting to remove these ?
> >> -/pg_upgrade_internal.log
> >> -/loadable_libraries.txt
> 
> > Yep, it looks so as these are part of the logs, the second one being a
> > failure state.
> 
> >> -/reindex_hash.sql
> 
> > But this one is not, no?
> 
> I'd like to get to a state where there's just one thing to "rm -rf"
> to clean up after any pg_upgrade run.  If we continue to leave the
> we-suggest-you-run-these scripts loose in $CWD then we've not really
> improved things much.


My patch moves reindex_hash.sql, and I'm having trouble seeing why it shouldn't
be handled in .gitignore the same way as other stuff that's moved.

But delete-old-cluster.sh is not moved, and I'm not sure how to improve on
that.

> Perhaps there'd be merit in putting log files into an additional
> subdirectory of that output directory, like
> pg_upgrade_output.d/logs/foo.log, so that the more-ignorable
> output files would be separated from the less-ignorable ones.
> Or perhaps that's just gilding the lily.

In the case it's successful, everything is removed - except for the delete
script.  I can see the case for separating the dumps (which are essentially
internal and of which there may be many) and the logs (same), from
the .txt error files like loadable_libraries.txt (which are user-facing).

It could also be divided with each DB having its own subdir, with a dumpfile
and a logfile.

Should the unix socket be created underneath the "output dir" ?

Should it be possible to set the output dir to "." ?  That would give the
pre-existing behavior, but only if we don't use subdirs for log/ and dump/.

>From 8d82297d0e46b48460820ca338781d352093443a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Sat, 11 Dec 2021 19:19:50 -0600
Subject: [PATCH] pg_upgrade: write log files and dumps in subdir..

The subdir must not exist when pg_upgrade is started.
This avoids appending to logfiles which have pre-existing errors in them.
And allows more easily cleaning up after pg_upgrade.
---
 doc/src/sgml/ref/pgupgrade.sgml | 15 ++++++-
 src/bin/pg_upgrade/.gitignore   |  3 --
 src/bin/pg_upgrade/check.c      | 14 +++---
 src/bin/pg_upgrade/dump.c       |  6 ++-
 src/bin/pg_upgrade/exec.c       |  5 ++-
 src/bin/pg_upgrade/function.c   |  3 +-
 src/bin/pg_upgrade/option.c     | 31 ++++++++++++--
 src/bin/pg_upgrade/pg_upgrade.c | 76 +++++++++++++++++++++++----------
 src/bin/pg_upgrade/pg_upgrade.h |  1 +
 src/bin/pg_upgrade/server.c     |  6 ++-
 10 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index c5ce732ee98..e6d730a2574 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -130,6 +130,19 @@ PostgreSQL documentation
       cluster</para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-l</option></term>
+      <term><option>--logdir</option></term>
+      <listitem><para>Directory where SQL and log files will be written.
+      The directory will be created and must not exist before calling
+      <command>pg_upgrade</command>.
+      It will be removed after a successful upgrade unless
+      <option>--retain</option> is specified.
+      The default is <literal>pg_upgrade_output.d</literal> in the current
+      working directory.
+      </para></listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-N</option></term>
       <term><option>--no-sync</option></term>
@@ -768,7 +781,7 @@ psql --username=postgres --file=script.sql postgres
 
   <para>
    <application>pg_upgrade</application> creates various working files, such
-   as schema dumps, in the current working directory.  For security, be sure
+   as schema dumps, below the current working directory.  For security, be sure
    that that directory is not readable or writable by any other users.
   </para>
 
diff --git a/src/bin/pg_upgrade/.gitignore b/src/bin/pg_upgrade/.gitignore
index 2d3bfeaa502..7222f8a6b1f 100644
--- a/src/bin/pg_upgrade/.gitignore
+++ b/src/bin/pg_upgrade/.gitignore
@@ -1,9 +1,6 @@
 /pg_upgrade
 # Generated by test suite
-/pg_upgrade_internal.log
 /delete_old_cluster.sh
 /delete_old_cluster.bat
-/reindex_hash.sql
-/loadable_libraries.txt
 /log/
 /tmp_check/
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 1b8ef79242b..d422e580e07 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -552,7 +552,7 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name)
 
 	prep_status("Creating script to delete old cluster");
 
-	if ((script = fopen_priv(*deletion_script_file_name, "w")) == NULL)
+	if ((script = fopen_priv(*deletion_script_file_name, "w")) == NULL) // OK ?
 		pg_fatal("could not open file \"%s\": %s\n",
 				 *deletion_script_file_name, strerror(errno));
 
@@ -784,7 +784,8 @@ check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster)
 		return;
 	}
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "contrib_isn_and_int8_pass_by_value.txt");
 
 	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
@@ -861,7 +862,8 @@ check_for_user_defined_postfix_ops(ClusterInfo *cluster)
 
 	prep_status("Checking for user-defined postfix operators");
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "postfix_ops.txt");
 
 	/* Find any user defined postfix operators */
@@ -960,7 +962,8 @@ check_for_tables_with_oids(ClusterInfo *cluster)
 
 	prep_status("Checking for tables WITH OIDS");
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "tables_with_oids.txt");
 
 	/* Find any tables declared WITH OIDS */
@@ -1215,7 +1218,8 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
 
 	prep_status("Checking for user-defined encoding conversions");
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "encoding_conversions.txt");
 
 	/* Find any user defined encoding conversions */
diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c
index 90060d0f8eb..d50f71e5136 100644
--- a/src/bin/pg_upgrade/dump.c
+++ b/src/bin/pg_upgrade/dump.c
@@ -22,9 +22,10 @@ generate_old_dump(void)
 	/* run new pg_dumpall binary for globals */
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
 			  "\"%s/pg_dumpall\" %s --globals-only --quote-all-identifiers "
-			  "--binary-upgrade %s -f %s",
+			  "--binary-upgrade %s -f %s/dump/%s",
 			  new_cluster.bindir, cluster_conn_opts(&old_cluster),
 			  log_opts.verbose ? "--verbose" : "",
+			  log_opts.basedir,
 			  GLOBALS_DUMP_FILE);
 	check_ok();
 
@@ -52,9 +53,10 @@ generate_old_dump(void)
 
 		parallel_exec_prog(log_file_name, NULL,
 						   "\"%s/pg_dump\" %s --schema-only --quote-all-identifiers "
-						   "--binary-upgrade --format=custom %s --file=\"%s\" %s",
+						   "--binary-upgrade --format=custom %s --file=\"%s/dump/%s\" %s",
 						   new_cluster.bindir, cluster_conn_opts(&old_cluster),
 						   log_opts.verbose ? "--verbose" : "",
+						   log_opts.basedir,
 						   sql_file_name, escaped_connstr.data);
 
 		termPQExpBuffer(&escaped_connstr);
diff --git a/src/bin/pg_upgrade/exec.c b/src/bin/pg_upgrade/exec.c
index 19cc06e0c36..aba639f0ee4 100644
--- a/src/bin/pg_upgrade/exec.c
+++ b/src/bin/pg_upgrade/exec.c
@@ -78,11 +78,12 @@ get_bin_version(ClusterInfo *cluster)
  * The code requires it be called first from the primary thread on Windows.
  */
 bool
-exec_prog(const char *log_file, const char *opt_log_file,
+exec_prog(const char *log_filename, const char *opt_log_file,
 		  bool report_error, bool exit_on_error, const char *fmt,...)
 {
 	int			result = 0;
 	int			written;
+	char		log_file[MAXPGPATH];
 
 #define MAXCMDLEN (2 * MAXPGPATH)
 	char		cmd[MAXCMDLEN];
@@ -97,6 +98,8 @@ exec_prog(const char *log_file, const char *opt_log_file,
 		mainThreadId = GetCurrentThreadId();
 #endif
 
+	snprintf(log_file, MAXPGPATH, "%s/log/%s", log_opts.basedir, log_filename);
+
 	written = 0;
 	va_start(ap, fmt);
 	written += vsnprintf(cmd + written, MAXCMDLEN - written, fmt, ap);
diff --git a/src/bin/pg_upgrade/function.c b/src/bin/pg_upgrade/function.c
index 627ec0ce28b..9e16df32033 100644
--- a/src/bin/pg_upgrade/function.c
+++ b/src/bin/pg_upgrade/function.c
@@ -128,7 +128,8 @@ check_loadable_libraries(void)
 
 	prep_status("Checking for presence of required libraries");
 
-	snprintf(output_path, sizeof(output_path), "loadable_libraries.txt");
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			log_opts.basedir, "loadable_libraries.txt");
 
 	/*
 	 * Now we want to sort the library names into order.  This avoids multiple
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 66fe16964e6..7c86bfc0569 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -57,6 +57,7 @@ parseCommandLine(int argc, char *argv[])
 		{"socketdir", required_argument, NULL, 's'},
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
+		{"logdir", required_argument, NULL, 'l'},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -66,6 +67,7 @@ parseCommandLine(int argc, char *argv[])
 	FILE	   *fp;
 	char	  **filename;
 	time_t		run_time = time(NULL);
+	char		filename_path[MAXPGPATH];
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
@@ -136,6 +138,10 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_LINK;
 				break;
 
+			case 'l':
+				log_opts.basedir = pg_strdup(optarg);
+				break;
+
 			case 'N':
 				user_opts.do_sync = false;
 				break;
@@ -208,8 +214,23 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")\n", argv[optind]);
 
-	if ((log_opts.internal = fopen_priv(INTERNAL_LOG_FILE, "a")) == NULL)
-		pg_fatal("could not open log file \"%s\": %m\n", INTERNAL_LOG_FILE);
+	if (log_opts.basedir == NULL)
+		log_opts.basedir = "pg_upgrade_output.d";
+
+	if (mkdir(log_opts.basedir, 00700))
+		pg_fatal("could not create directory \"%s\": %m\n", log_opts.basedir);
+
+	snprintf(filename_path, sizeof(filename_path), "%s/log", log_opts.basedir);
+	if (mkdir(filename_path, 00700))
+		pg_fatal("could not create directory \"%s\": %m\n", filename_path);
+
+	snprintf(filename_path, sizeof(filename_path), "%s/dump", log_opts.basedir);
+	if (mkdir(filename_path, 00700))
+		pg_fatal("could not create directory \"%s\": %m\n", filename_path);
+
+	snprintf(filename_path, sizeof(filename_path), "%s/log/%s", log_opts.basedir, INTERNAL_LOG_FILE);
+	if ((log_opts.internal = fopen_priv(filename_path, "a")) == NULL)
+		pg_fatal("could not open log file \"%s\": %m\n", filename_path);
 
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode\n");
@@ -217,8 +238,10 @@ parseCommandLine(int argc, char *argv[])
 	/* label start of upgrade in logfiles */
 	for (filename = output_files; *filename != NULL; filename++)
 	{
-		if ((fp = fopen_priv(*filename, "a")) == NULL)
-			pg_fatal("could not write to log file \"%s\": %m\n", *filename);
+		snprintf(filename_path, sizeof(filename_path), "%s/log/%s",
+				log_opts.basedir, *filename);
+		if ((fp = fopen_priv(filename_path, "a")) == NULL)
+			pg_fatal("could not write to log file \"%s\": %m\n", filename_path);
 
 		/* Start with newline because we might be appending to a file. */
 		fprintf(fp, "\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index f85cb2e2620..06e16f0b0b6 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -305,8 +305,9 @@ prepare_new_globals(void)
 	prep_status("Restoring global objects in the new cluster");
 
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-			  "\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s\"",
+			  "\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s/dump/%s\"",
 			  new_cluster.bindir, cluster_conn_opts(&new_cluster),
+			  log_opts.basedir,
 			  GLOBALS_DUMP_FILE);
 	check_ok();
 }
@@ -351,10 +352,11 @@ create_new_objects(void)
 				  true,
 				  true,
 				  "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
-				  "--dbname postgres \"%s\"",
+				  "--dbname postgres \"%s/dump/%s\"",
 				  new_cluster.bindir,
 				  cluster_conn_opts(&new_cluster),
 				  create_opts,
+				  log_opts.basedir,
 				  sql_file_name);
 
 		break;					/* done once we've processed template1 */
@@ -388,10 +390,11 @@ create_new_objects(void)
 		parallel_exec_prog(log_file_name,
 						   NULL,
 						   "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
-						   "--dbname template1 \"%s\"",
+						   "--dbname template1 \"%s/dump/%s\"",
 						   new_cluster.bindir,
 						   cluster_conn_opts(&new_cluster),
 						   create_opts,
+						   log_opts.basedir,
 						   sql_file_name);
 	}
 
@@ -684,32 +687,61 @@ set_frozenxids(bool minmxid_only)
 static void
 cleanup(void)
 {
+	int			dbnum;
+	char	  **filename;
+	char		filename_path[MAXPGPATH];
+
 	fclose(log_opts.internal);
 
 	/* Remove dump and log files? */
-	if (!log_opts.retain)
+	if (log_opts.retain)
+		return;
+
+	for (filename = output_files; *filename != NULL; filename++)
 	{
-		int			dbnum;
-		char	  **filename;
+		snprintf(filename_path, sizeof(filename_path), "%s/log/%s",
+			log_opts.basedir, *filename);
+		if (unlink(filename_path))
+			pg_log(PG_WARNING, "failed to unlink: %s: %m\n",
+			filename_path);
+	}
 
-		for (filename = output_files; *filename != NULL; filename++)
-			unlink(*filename);
+	/* remove dump files */
+	snprintf(filename_path, sizeof(filename_path), "%s/dump/%s",
+		log_opts.basedir, GLOBALS_DUMP_FILE);
+	if (unlink(filename_path))
+		pg_log(PG_WARNING, "failed to unlink: %s: %m\n", filename_path);
 
-		/* remove dump files */
-		unlink(GLOBALS_DUMP_FILE);
+	if (old_cluster.dbarr.dbs)
+	{
+		for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
+		{
+			DbInfo	   *old_db = &old_cluster.dbarr.dbs[dbnum];
+
+			snprintf(filename_path, sizeof(filename_path),
+					"%s/dump/" DB_DUMP_FILE_MASK,
+					log_opts.basedir, old_db->db_oid);
+			if (unlink(filename_path))
+				pg_log(PG_WARNING, "failed to unlink: %s: %m\n", filename_path);
+
+			snprintf(filename_path, sizeof(filename_path),
+					"%s/log/" DB_DUMP_LOG_FILE_MASK,
+					log_opts.basedir, old_db->db_oid);
+			if (unlink(filename_path))
+				pg_log(PG_WARNING, "failed to unlink: %s: %m\n", filename_path);
+		}
+	}
 
-		if (old_cluster.dbarr.dbs)
-			for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
-			{
-				char		sql_file_name[MAXPGPATH],
-							log_file_name[MAXPGPATH];
-				DbInfo	   *old_db = &old_cluster.dbarr.dbs[dbnum];
+	snprintf(filename_path, sizeof(filename_path),
+			"%s/dump", log_opts.basedir);
+	if (rmdir(filename_path))
+		pg_log(PG_WARNING, "failed to rmdir: %s: %m\n", filename_path);
 
-				snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid);
-				unlink(sql_file_name);
+	snprintf(filename_path, sizeof(filename_path),
+			"%s/log", log_opts.basedir);
+	if (rmdir(filename_path))
+		pg_log(PG_WARNING, "failed to rmdir: %s: %m\n", filename_path);
 
-				snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid);
-				unlink(log_file_name);
-			}
-	}
+	if (rmdir(log_opts.basedir))
+		pg_log(PG_WARNING, "failed to rmdir: %s: %m\n", log_opts.basedir);
 }
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 22169f10021..baa7613753e 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -269,6 +269,7 @@ typedef struct
 	FILE	   *internal;		/* internal log FILE */
 	bool		verbose;		/* true -> be verbose in messages */
 	bool		retain;			/* retain log files on success */
+	char		*basedir;		/* Dir to create logfiles */
 } LogOpts;
 
 
diff --git a/src/bin/pg_upgrade/server.c b/src/bin/pg_upgrade/server.c
index 442a345173c..120437bf0ac 100644
--- a/src/bin/pg_upgrade/server.c
+++ b/src/bin/pg_upgrade/server.c
@@ -238,8 +238,10 @@ start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error)
 	 * vacuumdb --freeze actually freezes the tuples.
 	 */
 	snprintf(cmd, sizeof(cmd),
-			 "\"%s/pg_ctl\" -w -l \"%s\" -D \"%s\" -o \"-p %d -b%s %s%s\" start",
-			 cluster->bindir, SERVER_LOG_FILE, cluster->pgconfig, cluster->port,
+			 "\"%s/pg_ctl\" -w -l \"%s/log/%s\" -D \"%s\" -o \"-p %d -b%s %s%s\" start",
+			 cluster->bindir,
+			 log_opts.basedir,
+			 SERVER_LOG_FILE, cluster->pgconfig, cluster->port,
 			 (cluster == &new_cluster) ?
 			 " -c synchronous_commit=off -c fsync=off -c full_page_writes=off -c vacuum_defer_cleanup_age=0" : "",
 			 cluster->pgopts ? cluster->pgopts : "", socket_string);
-- 
2.17.0

Re: pg_upgrade should truncate/remove its logs before running

Reply via email to