On Wed, Mar 01, 2023 at 04:52:49PM +0100, Tomas Vondra wrote:
> Thanks. That seems correct to me, but I find it somewhat confusing,
> because we now have
> 
>  DeflateCompressorInit vs. InitCompressorGzip
> 
>  DeflateCompressorEnd vs. EndCompressorGzip
> 
>  DeflateCompressorData - The name doesn't really say what it does (would
>                          be better to have a verb in there, I think).
> 
> I wonder if we can make this somehow clearer?

To move things along, I updated Georgios' patch:

Rename DeflateCompressorData() to DeflateCompressorCommon();
Rearrange functions to their original order allowing a cleaner diff to the 
prior code;
Change pg_fatal() to an assertion+comment;
Update the commit message and fix a few typos;

> Also, InitCompressorGzip says this:
> 
>    /*
>     * If the caller has defined a write function, prepare the necessary
>     * state. Avoid initializing during the first write call, because End
>     * may be called without ever writing any data.
>     */
>     if (cs->writeF)
>         DeflateCompressorInit(cs);
>
> Does it actually make sense to not have writeF defined in some cases?

InitCompressor is being called for either reading or writing, either of
which could be null:

src/bin/pg_dump/pg_backup_custom.c:     ctx->cs = 
AllocateCompressor(AH->compression_spec,
src/bin/pg_dump/pg_backup_custom.c-                                             
                 NULL,
src/bin/pg_dump/pg_backup_custom.c-                                             
                 _CustomWriteFunc);
--
src/bin/pg_dump/pg_backup_custom.c:     cs = 
AllocateCompressor(AH->compression_spec,
src/bin/pg_dump/pg_backup_custom.c-                                             
        _CustomReadFunc, NULL);

It's confusing that the comment says "Avoid initializing...".  What it
really means is "Initialize eagerly...".  But that makes more sense in
the context of the commit message for this bugfix than in a comment.  So
I changed that too.

+       /* If deflation was initialized, finalize it */                         
                                                             
+       if (cs->private_data)                                                   
                                                             
+               DeflateCompressorEnd(AH, cs);                                   
                                                             

Maybe it'd be more clear if this used "if (cs->writeF)", like in the
init function ?

-- 
Justin
>From 5c027aa86e8591db140093c48a58aafba7a6c28c Mon Sep 17 00:00:00 2001
From: Georgios Kokolatos <gkokola...@pm.me>
Date: Wed, 1 Mar 2023 12:42:32 +0000
Subject: [PATCH] pg_dump: fix gzip compression of empty data

When creating dumps with the Compressor API, it is possible to only call
the Allocate and End compressor functions without ever writing any data.
Since e9960732a, the gzip implementation wrongly assumed that the write
function would always be called and deferred the initialization of the
internal compression system until the first write call.

The problem with that approach is that the End call would not finalize
the internal compression system if it hadn't been initialized.

This commit initializes the internal compression system during the
Allocate call, whenever a write function was provided by the caller.
Given that decompression does not need to keep track of any state, the
compressor's private_data member is now populated only during
compression.

In passing, rearrange the functions to their original order, to allow
usefully comparing with the previous code, like:
git diff --diff-algorithm=minimal -w e9960732a~:src/bin/pg_dump/compress_io.c src/bin/pg_dump/compress_gzip.c

Also replace an unreachable pg_fatal() with an assert+comment.  I
(Justin) argued that the new fatal shouldn't have been introduced in a
refactoring commit, so this is a compromise.

Report and initial patch by Justin Pryzby, test case by Georgios
Kokolatos.

https://www.postgresql.org/message-id/20230228235834.GC30529%40telsasoft.com
---
 src/bin/pg_dump/compress_gzip.c  | 137 ++++++++++++++++++-------------
 src/bin/pg_dump/t/002_pg_dump.pl |  23 ++++++
 2 files changed, 101 insertions(+), 59 deletions(-)

diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c
index 0af65afeb4e..3c9ac55c266 100644
--- a/src/bin/pg_dump/compress_gzip.c
+++ b/src/bin/pg_dump/compress_gzip.c
@@ -32,9 +32,75 @@ typedef struct GzipCompressorState
 	size_t		outsize;
 } GzipCompressorState;
 
+
 /* Private routines that support gzip compressed data I/O */
+static void DeflateCompressorInit(CompressorState *cs);
+static void DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs);
+static void DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs,
+									bool flush);
+static void EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs);
+static void WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
+								   const void *data, size_t dLen);
+static void ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs);
+
 static void
-DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush)
+DeflateCompressorInit(CompressorState *cs)
+{
+	GzipCompressorState *gzipcs;
+	z_streamp	zp;
+
+	gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
+	zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
+	zp->zalloc = Z_NULL;
+	zp->zfree = Z_NULL;
+	zp->opaque = Z_NULL;
+
+	/*
+	 * outsize is the buffer size we tell zlib it can output to.  We actually
+	 * allocate one extra byte because some routines want to append a trailing
+	 * zero byte to the zlib output.
+	 */
+	gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1);
+	gzipcs->outsize = ZLIB_OUT_SIZE;
+
+	/* -Z 0 uses the "None" compressor -- not zlib with no compression */
+	Assert(cs->compression_spec.level != 0);
+
+	if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
+		pg_fatal("could not initialize compression library: %s", zp->msg);
+
+	/* Just be paranoid - maybe End is called after Start, with no Write */
+	zp->next_out = gzipcs->outbuf;
+	zp->avail_out = gzipcs->outsize;
+
+	/* Keep track of gzipcs */
+	cs->private_data = gzipcs;
+}
+
+static void
+DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs)
+{
+	GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
+	z_streamp	zp;
+
+	zp = gzipcs->zp;
+	zp->next_in = NULL;
+	zp->avail_in = 0;
+
+	/* Flush any remaining data from zlib buffer */
+	DeflateCompressorCommon(AH, cs, true);
+
+	if (deflateEnd(zp) != Z_OK)
+		pg_fatal("could not close compression stream: %s", zp->msg);
+
+	pg_free(gzipcs->outbuf);
+	pg_free(gzipcs->zp);
+	pg_free(gzipcs);
+	cs->private_data = NULL;
+}
+
+static void
+DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush)
 {
 	GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
 	z_streamp	zp = gzipcs->zp;
@@ -78,27 +144,9 @@ DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush)
 static void
 EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs)
 {
-	GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
-	z_streamp	zp;
-
-	if (gzipcs->zp)
-	{
-		zp = gzipcs->zp;
-		zp->next_in = NULL;
-		zp->avail_in = 0;
-
-		/* Flush any remaining data from zlib buffer */
-		DeflateCompressorGzip(AH, cs, true);
-
-		if (deflateEnd(zp) != Z_OK)
-			pg_fatal("could not close compression stream: %s", zp->msg);
-
-		pg_free(gzipcs->outbuf);
-		pg_free(gzipcs->zp);
-	}
-
-	pg_free(gzipcs);
-	cs->private_data = NULL;
+	/* If deflation was initialized, finalize it */
+	if (cs->private_data)
+		DeflateCompressorEnd(AH, cs);
 }
 
 static void
@@ -106,41 +154,10 @@ WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
 					   const void *data, size_t dLen)
 {
 	GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
-	z_streamp	zp;
-
-	if (!gzipcs->zp)
-	{
-		zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
-		zp->zalloc = Z_NULL;
-		zp->zfree = Z_NULL;
-		zp->opaque = Z_NULL;
-
-		/*
-		 * outsize is the buffer size we tell zlib it can output to.  We
-		 * actually allocate one extra byte because some routines want to
-		 * append a trailing zero byte to the zlib output.
-		 */
-		gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1);
-		gzipcs->outsize = ZLIB_OUT_SIZE;
-
-		/*
-		 * A level of zero simply copies the input one block at the time. This
-		 * is probably not what the user wanted when calling this interface.
-		 */
-		if (cs->compression_spec.level == 0)
-			pg_fatal("requested to compress the archive yet no level was specified");
-
-		if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
-			pg_fatal("could not initialize compression library: %s", zp->msg);
-
-		/* Just be paranoid - maybe End is called after Start, with no Write */
-		zp->next_out = gzipcs->outbuf;
-		zp->avail_out = gzipcs->outsize;
-	}
 
 	gzipcs->zp->next_in = (void *) unconstify(void *, data);
 	gzipcs->zp->avail_in = dLen;
-	DeflateCompressorGzip(AH, cs, false);
+	DeflateCompressorCommon(AH, cs, false);
 }
 
 static void
@@ -214,17 +231,19 @@ void
 InitCompressorGzip(CompressorState *cs,
 				   const pg_compress_specification compression_spec)
 {
-	GzipCompressorState *gzipcs;
-
 	cs->readData = ReadDataFromArchiveGzip;
 	cs->writeData = WriteDataToArchiveGzip;
 	cs->end = EndCompressorGzip;
 
 	cs->compression_spec = compression_spec;
 
-	gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
-
-	cs->private_data = gzipcs;
+	/*
+	 * If the caller has defined a write function, prepare the necessary
+	 * state.  Note that if the data is empty, End may be called immediately
+	 * after Init, without ever calling Write.
+	 */
+	if (cs->writeF)
+		DeflateCompressorInit(cs);
 }
 
 
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 187e4b8d07d..14cd0d2d503 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -1194,6 +1194,29 @@ my %tests = (
 		},
 	},
 
+	'LO create (with no data)' => {
+		create_sql =>
+		  'SELECT pg_catalog.lo_create(0);',
+		regexp => qr/^
+			\QSELECT pg_catalog.lo_open\E \('\d+',\ \d+\);\n
+			\QSELECT pg_catalog.lo_close(0);\E
+			/xm,
+		like   => {
+			%full_runs,
+			column_inserts         => 1,
+			data_only              => 1,
+			inserts                => 1,
+			section_data           => 1,
+			test_schema_plus_large_objects => 1,
+		},
+		unlike => {
+			binary_upgrade         => 1,
+			no_large_objects       => 1,
+			schema_only            => 1,
+			section_pre_data       => 1,
+		},
+	},
+
 	'COMMENT ON DATABASE postgres' => {
 		regexp => qr/^COMMENT ON DATABASE postgres IS .+;/m,
 
-- 
2.34.1

Reply via email to