Hi all,
(Added Georgios in CC.)

When working on the support of LZ4 for pg_receivewal, walmethods.c has
gained an extra parameter for the compression method.  It gets used on
the DirectoryMethod instead of the compression level to decide which
type of compression is used.  One thing that I left out during this
previous work is that the TarMethod also gained knowledge of this
compression method, but we still use the compression level to check if
tars should be compressed or not.

This is wrong on multiple aspects.  First, this is not consistent with
the directory method, making walmethods.c harder to figure out.
Second, this is not extensible if we want to introduce more
compression methods in pg_basebackup, like LZ4.  This reflects on the
options used by pg_receivewal and pg_basebackup, that are not
inconsistent as well.

The attached patch refactors the code of pg_basebackup and the
TarMethod of walmethods.c to use the compression method where it
should, splitting entirely the logic related the compression level.

This is one step toward the introduction of LZ4 in pg_basebackup, but
this refactoring is worth doing on its own, hence a separate thread to
deal with this problem first.  The options of pg_basebackup are
reworked to be consistent with pg_receivewal, as follows:
- --compress ranges now from 1 to 9, instead of 0 to 9.
- --compression-method={none,gzip} is added, the default is none, same
as HEAD.
- --gzip/-z has the same meaning as before, being just a synonym of
--compression-method=gzip with the default compression level of ZLIB
assigned if there is no --compress.

One more thing that I have noticed while hacking this stuff is that we
have no regression tests for gzip with pg_basebackup, so I have added
some that are skipped when not compiling the code with ZLIB.

Opinions?
--
Michael
From 6457eae3577cd0074f58797c4fa420c296d68b39 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Sat, 18 Dec 2021 20:27:30 +0900
Subject: [PATCH] Refactor options of pg_basebackup for compression level and
 methods

---
 src/bin/pg_basebackup/pg_basebackup.c        | 79 +++++++++++++++-----
 src/bin/pg_basebackup/t/010_pg_basebackup.pl | 47 +++++++++++-
 src/bin/pg_basebackup/walmethods.c           | 47 +++++++-----
 doc/src/sgml/ref/pg_basebackup.sgml          | 22 +++++-
 4 files changed, 155 insertions(+), 40 deletions(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1739ac6382..01529aa036 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -113,6 +113,7 @@ static bool showprogress = false;
 static bool estimatesize = true;
 static int	verbose = 0;
 static int	compresslevel = 0;
+static WalCompressionMethod compression_method = COMPRESSION_NONE;
 static IncludeWal includewal = STREAM_WAL;
 static bool fastcheckpoint = false;
 static bool writerecoveryconf = false;
@@ -359,7 +360,9 @@ usage(void)
 	printf(_("  -X, --wal-method=none|fetch|stream\n"
 			 "                         include required WAL files with specified method\n"));
 	printf(_("  -z, --gzip             compress tar output\n"));
-	printf(_("  -Z, --compress=0-9     compress tar output with given compression level\n"));
+	printf(_("  -Z, --compress=1-9     compress tar output with given compression level\n"));
+	printf(_("      --compression-method=METHOD\n"
+			 "                         method to compress data\n"));
 	printf(_("\nGeneral options:\n"));
 	printf(_("  -c, --checkpoint=fast|spread\n"
 			 "                         set fast or spread checkpointing\n"));
@@ -524,7 +527,7 @@ LogStreamerMain(logstreamer_param *param)
 													stream.do_sync);
 	else
 		stream.walmethod = CreateWalTarMethod(param->xlog,
-											  COMPRESSION_NONE, /* ignored */
+											  compression_method,
 											  compresslevel,
 											  stream.do_sync);
 
@@ -1034,19 +1037,22 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
 			archive_file = NULL;
 		}
 
+		if (compression_method == COMPRESSION_NONE)
+			streamer = bbstreamer_plain_writer_new(archive_filename,
+												   archive_file);
 #ifdef HAVE_LIBZ
-		if (compresslevel != 0)
+		else if (compression_method == COMPRESSION_GZIP)
 		{
 			strlcat(archive_filename, ".gz", sizeof(archive_filename));
 			streamer = bbstreamer_gzip_writer_new(archive_filename,
 												  archive_file,
 												  compresslevel);
 		}
-		else
 #endif
-			streamer = bbstreamer_plain_writer_new(archive_filename,
-												   archive_file);
-
+		else
+		{
+			Assert(false);		/* not reachable */
+		}
 
 		/*
 		 * If we need to parse the archive for whatever reason, then we'll
@@ -1765,6 +1771,7 @@ main(int argc, char **argv)
 		{"no-manifest", no_argument, NULL, 5},
 		{"manifest-force-encode", no_argument, NULL, 6},
 		{"manifest-checksums", required_argument, NULL, 7},
+		{"compression-method", required_argument, NULL, 8},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -1872,14 +1879,10 @@ main(int argc, char **argv)
 				do_sync = false;
 				break;
 			case 'z':
-#ifdef HAVE_LIBZ
-				compresslevel = Z_DEFAULT_COMPRESSION;
-#else
-				compresslevel = 1;	/* will be rejected below */
-#endif
+				compression_method = COMPRESSION_GZIP;
 				break;
 			case 'Z':
-				if (!option_parse_int(optarg, "-Z/--compress", 0, 9,
+				if (!option_parse_int(optarg, "-Z/--compress", 1, 9,
 									  &compresslevel))
 					exit(1);
 				break;
@@ -1941,6 +1944,18 @@ main(int argc, char **argv)
 			case 7:
 				manifest_checksums = pg_strdup(optarg);
 				break;
+			case 8:
+				if (pg_strcasecmp(optarg, "gzip") == 0)
+					compression_method = COMPRESSION_GZIP;
+				else if (pg_strcasecmp(optarg, "none") == 0)
+					compression_method = COMPRESSION_NONE;
+				else
+				{
+					pg_log_error("invalid value \"%s\" for option %s",
+								 optarg, "--compression-method");
+					exit(1);
+				}
+				break;
 			default:
 
 				/*
@@ -1978,7 +1993,7 @@ main(int argc, char **argv)
 	/*
 	 * Mutually exclusive arguments
 	 */
-	if (format == 'p' && compresslevel != 0)
+	if (format == 'p' && compression_method != COMPRESSION_NONE)
 	{
 		pg_log_error("only tar mode backups can be compressed");
 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
@@ -2056,13 +2071,39 @@ main(int argc, char **argv)
 		}
 	}
 
-#ifndef HAVE_LIBZ
-	if (compresslevel != 0)
+	/*
+	 * Compression-related options.
+	 */
+	switch (compression_method)
 	{
-		pg_log_error("this build does not support compression");
-		exit(1);
-	}
+		case COMPRESSION_NONE:
+			if (compresslevel != 0)
+			{
+				pg_log_error("cannot use --compress with --compression-method=%s",
+							 "none");
+				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+						progname);
+				exit(1);
+			}
+			break;
+		case COMPRESSION_GZIP:
+#ifdef HAVE_LIBZ
+			if (compresslevel == 0)
+			{
+				pg_log_info("no value specified for --compress, switching to default");
+				compresslevel = Z_DEFAULT_COMPRESSION;
+			}
+#else
+			pg_log_error("this build does not support compression with %s",
+						 "gzip");
+			exit(1);
 #endif
+			break;
+		case COMPRESSION_LZ4:
+			/* option not supported */
+			Assert(false);
+			break;
+	}
 
 	if (showprogress && !estimatesize)
 	{
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index e56825382c..3dbda3b5a8 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -10,7 +10,7 @@ use File::Path qw(rmtree);
 use Fcntl qw(:seek);
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
-use Test::More tests => 110;
+use Test::More tests => 115;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -31,6 +31,17 @@ my $pgdata = $node->data_dir;
 $node->command_fails(['pg_basebackup'],
 	'pg_basebackup needs target directory specified');
 
+# Sanity checks for options
+$node->command_fails_like(
+	[
+		'pg_basebackup',   '-D',
+		"$tempdir/backup", '--compression-method',
+		'none',            '--compress',
+		'1'
+	],
+	qr/\Qpg_basebackup: error: cannot use --compress with --compression-method=none/,
+	'failure if --compress specified with --compression-method=none');
+
 # Some Windows ANSI code pages may reject this filename, in which case we
 # quietly proceed without this bit of test coverage.
 if (open my $badchars, '>>', "$tempdir/pgdata/FOO\xe0\xe0\xe0BAR")
@@ -624,3 +635,37 @@ rmtree("$tempdir/backup_corrupt4");
 
 $node->safe_psql('postgres', "DROP TABLE corrupt1;");
 $node->safe_psql('postgres', "DROP TABLE corrupt2;");
+
+note "Testing pg_basebackup with compression methods";
+
+# Check ZLIB compression if available.
+SKIP:
+{
+	skip "postgres was not built with ZLIB support", 5
+	  if (!check_pg_config("#define HAVE_LIBZ 1"));
+
+	$node->command_ok(
+		[
+			'pg_basebackup',        '-D',
+			"$tempdir/backup_gzip", '--compression-method',
+			'gzip', '--compress', '1', '--no-sync', '--format', 't'
+		],
+		'pg_basebackup with --compress and --compression-method=gzip');
+
+	# Verify that the stored files are generated with their expected
+	# names.
+	my @zlib_files = glob "$tempdir/backup_gzip/*.tar.gz";
+	is(scalar(@zlib_files), 2,
+		"two files created with gzip (base.tar.gz and pg_wal.tar.gz)");
+
+	# Check the integrity of the files generated.
+	my $gzip = $ENV{GZIP_PROGRAM};
+	skip "program gzip is not found in your system", 1
+	  if ( !defined $gzip
+		|| $gzip eq ''
+		|| system_log($gzip, '--version') != 0);
+
+	my $gzip_is_valid = system_log($gzip, '--test', @zlib_files);
+	is($gzip_is_valid, 0, "gzip verified the integrity of compressed data");
+	rmtree("$tempdir/backup_gzip");
+}
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index affdc5055f..127bb5e013 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -749,7 +749,7 @@ tar_write(Walfile f, const void *buf, size_t count)
 	tar_clear_error();
 
 	/* Tarfile will always be positioned at the end */
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		r = write(tar_data->fd, buf, count);
@@ -833,7 +833,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 		}
 
 #ifdef HAVE_LIBZ
-		if (tar_data->compression_level)
+		if (tar_data->compression_method == COMPRESSION_GZIP)
 		{
 			tar_data->zp = (z_streamp) pg_malloc(sizeof(z_stream));
 			tar_data->zp->zalloc = Z_NULL;
@@ -884,7 +884,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 	pg_free(tmppath);
 
 #ifdef HAVE_LIBZ
-	if (tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Flush existing data */
 		if (!tar_write_compressed_data(NULL, 0, true))
@@ -909,7 +909,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 	}
 	tar_data->currentfile->currpos = 0;
 
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		if (write(tar_data->fd, tar_data->currentfile->header,
@@ -923,7 +923,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 		}
 	}
 #ifdef HAVE_LIBZ
-	else
+	else if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Write header through the zlib APIs but with no compression */
 		if (!tar_write_compressed_data(tar_data->currentfile->header,
@@ -938,6 +938,10 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 		}
 	}
 #endif
+	else
+	{
+		Assert(false);			/* not reachable */
+	}
 
 	tar_data->currentfile->pathname = pg_strdup(pathname);
 
@@ -948,7 +952,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
 	if (pad_to_size)
 	{
 		tar_data->currentfile->pad_to_size = pad_to_size;
-		if (!tar_data->compression_level)
+		if (tar_data->compression_method == COMPRESSION_NONE)
 		{
 			/* Uncompressed, so pad now */
 			if (!tar_write_padding_data(tar_data->currentfile, pad_to_size))
@@ -1009,7 +1013,7 @@ tar_sync(Walfile f)
 	 * Always sync the whole tarfile, because that's all we can do. This makes
 	 * no sense on compressed files, so just ignore those.
 	 */
-	if (tar_data->compression_level)
+	if (tar_data->compression_method != COMPRESSION_NONE)
 		return 0;
 
 	r = fsync(tar_data->fd);
@@ -1030,7 +1034,7 @@ tar_close(Walfile f, WalCloseMethod method)
 
 	if (method == CLOSE_UNLINK)
 	{
-		if (tar_data->compression_level)
+		if (tar_data->compression_method != COMPRESSION_NONE)
 		{
 			tar_set_error("unlink not supported with compression");
 			return -1;
@@ -1061,7 +1065,7 @@ tar_close(Walfile f, WalCloseMethod method)
 	 */
 	if (tf->pad_to_size)
 	{
-		if (tar_data->compression_level)
+		if (tar_data->compression_method == COMPRESSION_GZIP)
 		{
 			/*
 			 * A compressed tarfile is padded on close since we cannot know
@@ -1102,7 +1106,7 @@ tar_close(Walfile f, WalCloseMethod method)
 
 
 #ifdef HAVE_LIBZ
-	if (tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Flush the current buffer */
 		if (!tar_write_compressed_data(NULL, 0, true))
@@ -1131,7 +1135,7 @@ tar_close(Walfile f, WalCloseMethod method)
 		tar_data->lasterrno = errno;
 		return -1;
 	}
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		if (write(tar_data->fd, tf->header, TAR_BLOCK_SIZE) != TAR_BLOCK_SIZE)
@@ -1142,7 +1146,7 @@ tar_close(Walfile f, WalCloseMethod method)
 		}
 	}
 #ifdef HAVE_LIBZ
-	else
+	else if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		/* Turn off compression */
 		if (deflateParams(tar_data->zp, 0, 0) != Z_OK)
@@ -1164,6 +1168,10 @@ tar_close(Walfile f, WalCloseMethod method)
 		}
 	}
 #endif
+	else
+	{
+		Assert(false);			/* not reachable */
+	}
 
 	/* Move file pointer back down to end, so we can write the next file */
 	if (lseek(tar_data->fd, 0, SEEK_END) < 0)
@@ -1212,7 +1220,7 @@ tar_finish(void)
 
 	/* A tarfile always ends with two empty blocks */
 	MemSet(zerobuf, 0, sizeof(zerobuf));
-	if (!tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_NONE)
 	{
 		errno = 0;
 		if (write(tar_data->fd, zerobuf, sizeof(zerobuf)) != sizeof(zerobuf))
@@ -1223,7 +1231,7 @@ tar_finish(void)
 		}
 	}
 #ifdef HAVE_LIBZ
-	else
+	else if (tar_data->compression_method == COMPRESSION_GZIP)
 	{
 		if (!tar_write_compressed_data(zerobuf, sizeof(zerobuf), false))
 			return false;
@@ -1268,6 +1276,10 @@ tar_finish(void)
 		}
 	}
 #endif
+	else
+	{
+		Assert(false);			/* not reachable */
+	}
 
 	/* sync the empty blocks as well, since they're after the last file */
 	if (tar_data->sync)
@@ -1312,7 +1324,8 @@ CreateWalTarMethod(const char *tarbase,
 				   int compression_level, bool sync)
 {
 	WalWriteMethod *method;
-	const char *suffix = (compression_level != 0) ? ".tar.gz" : ".tar";
+	const char *suffix = (compression_method == COMPRESSION_GZIP) ?
+	".tar.gz" : ".tar";
 
 	method = pg_malloc0(sizeof(WalWriteMethod));
 	method->open_for_write = tar_open_for_write;
@@ -1335,7 +1348,7 @@ CreateWalTarMethod(const char *tarbase,
 	tar_data->compression_level = compression_level;
 	tar_data->sync = sync;
 #ifdef HAVE_LIBZ
-	if (compression_level)
+	if (compression_method == COMPRESSION_GZIP)
 		tar_data->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
 #endif
 
@@ -1347,7 +1360,7 @@ FreeWalTarMethod(void)
 {
 	pg_free(tar_data->tarfilename);
 #ifdef HAVE_LIBZ
-	if (tar_data->compression_level)
+	if (tar_data->compression_method == COMPRESSION_GZIP)
 		pg_free(tar_data->zlibOut);
 #endif
 	pg_free(tar_data);
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 9e6807b457..9cbec844c8 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -372,9 +372,9 @@ PostgreSQL documentation
       <listitem>
        <para>
         Enables gzip compression of tar file output, and specifies the
-        compression level (0 through 9, 0 being no compression and 9 being best
-        compression). Compression is only available when using the tar
-        format, and the suffix <filename>.gz</filename> will
+        compression level (1 through 9, 1 being worst compression and 9
+        being best compression). Compression is only available when using
+        the tar format, and the suffix <filename>.gz</filename> will
         automatically be added to all tar filenames.
        </para>
       </listitem>
@@ -567,6 +567,22 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--compression-method=<replaceable class="parameter">method</replaceable></option></term>
+      <listitem>
+       <para>
+        Enables compression of backup data using the specified method.
+        Supported values <literal>gzip</literal> and <literal>none</literal>,
+        the default.
+       </para>
+
+       <para>
+        The suffix <filename>.gz</filename> will automatically be added to
+        all filenames when using <literal>gzip</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+     
      <varlistentry>
       <term><option>--manifest-force-encode</option></term>
       <listitem>
-- 
2.34.1

Attachment: signature.asc
Description: PGP signature

Reply via email to