On Wed, Mar 01, 2023 at 04:52:49PM +0100, Tomas Vondra wrote: > Thanks. That seems correct to me, but I find it somewhat confusing, > because we now have > > DeflateCompressorInit vs. InitCompressorGzip > > DeflateCompressorEnd vs. EndCompressorGzip > > DeflateCompressorData - The name doesn't really say what it does (would > be better to have a verb in there, I think). > > I wonder if we can make this somehow clearer?
To move things along, I updated Georgios' patch: Rename DeflateCompressorData() to DeflateCompressorCommon(); Rearrange functions to their original order allowing a cleaner diff to the prior code; Change pg_fatal() to an assertion+comment; Update the commit message and fix a few typos; > Also, InitCompressorGzip says this: > > /* > * If the caller has defined a write function, prepare the necessary > * state. Avoid initializing during the first write call, because End > * may be called without ever writing any data. > */ > if (cs->writeF) > DeflateCompressorInit(cs); > > Does it actually make sense to not have writeF defined in some cases? InitCompressor is being called for either reading or writing, either of which could be null: src/bin/pg_dump/pg_backup_custom.c: ctx->cs = AllocateCompressor(AH->compression_spec, src/bin/pg_dump/pg_backup_custom.c- NULL, src/bin/pg_dump/pg_backup_custom.c- _CustomWriteFunc); -- src/bin/pg_dump/pg_backup_custom.c: cs = AllocateCompressor(AH->compression_spec, src/bin/pg_dump/pg_backup_custom.c- _CustomReadFunc, NULL); It's confusing that the comment says "Avoid initializing...". What it really means is "Initialize eagerly...". But that makes more sense in the context of the commit message for this bugfix than in a comment. So I changed that too. + /* If deflation was initialized, finalize it */ + if (cs->private_data) + DeflateCompressorEnd(AH, cs); Maybe it'd be more clear if this used "if (cs->writeF)", like in the init function ? -- Justin
>From 5c027aa86e8591db140093c48a58aafba7a6c28c Mon Sep 17 00:00:00 2001 From: Georgios Kokolatos <gkokola...@pm.me> Date: Wed, 1 Mar 2023 12:42:32 +0000 Subject: [PATCH] pg_dump: fix gzip compression of empty data When creating dumps with the Compressor API, it is possible to only call the Allocate and End compressor functions without ever writing any data. Since e9960732a, the gzip implementation wrongly assumed that the write function would always be called and deferred the initialization of the internal compression system until the first write call. The problem with that approach is that the End call would not finalize the internal compression system if it hadn't been initialized. This commit initializes the internal compression system during the Allocate call, whenever a write function was provided by the caller. Given that decompression does not need to keep track of any state, the compressor's private_data member is now populated only during compression. In passing, rearrange the functions to their original order, to allow usefully comparing with the previous code, like: git diff --diff-algorithm=minimal -w e9960732a~:src/bin/pg_dump/compress_io.c src/bin/pg_dump/compress_gzip.c Also replace an unreachable pg_fatal() with an assert+comment. I (Justin) argued that the new fatal shouldn't have been introduced in a refactoring commit, so this is a compromise. Report and initial patch by Justin Pryzby, test case by Georgios Kokolatos. https://www.postgresql.org/message-id/20230228235834.GC30529%40telsasoft.com --- src/bin/pg_dump/compress_gzip.c | 137 ++++++++++++++++++------------- src/bin/pg_dump/t/002_pg_dump.pl | 23 ++++++ 2 files changed, 101 insertions(+), 59 deletions(-) diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c index 0af65afeb4e..3c9ac55c266 100644 --- a/src/bin/pg_dump/compress_gzip.c +++ b/src/bin/pg_dump/compress_gzip.c @@ -32,9 +32,75 @@ typedef struct GzipCompressorState size_t outsize; } GzipCompressorState; + /* Private routines that support gzip compressed data I/O */ +static void DeflateCompressorInit(CompressorState *cs); +static void DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs); +static void DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, + bool flush); +static void EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs); +static void WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs, + const void *data, size_t dLen); +static void ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs); + static void -DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush) +DeflateCompressorInit(CompressorState *cs) +{ + GzipCompressorState *gzipcs; + z_streamp zp; + + gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState)); + zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); + zp->zalloc = Z_NULL; + zp->zfree = Z_NULL; + zp->opaque = Z_NULL; + + /* + * outsize is the buffer size we tell zlib it can output to. We actually + * allocate one extra byte because some routines want to append a trailing + * zero byte to the zlib output. + */ + gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1); + gzipcs->outsize = ZLIB_OUT_SIZE; + + /* -Z 0 uses the "None" compressor -- not zlib with no compression */ + Assert(cs->compression_spec.level != 0); + + if (deflateInit(zp, cs->compression_spec.level) != Z_OK) + pg_fatal("could not initialize compression library: %s", zp->msg); + + /* Just be paranoid - maybe End is called after Start, with no Write */ + zp->next_out = gzipcs->outbuf; + zp->avail_out = gzipcs->outsize; + + /* Keep track of gzipcs */ + cs->private_data = gzipcs; +} + +static void +DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs) +{ + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; + z_streamp zp; + + zp = gzipcs->zp; + zp->next_in = NULL; + zp->avail_in = 0; + + /* Flush any remaining data from zlib buffer */ + DeflateCompressorCommon(AH, cs, true); + + if (deflateEnd(zp) != Z_OK) + pg_fatal("could not close compression stream: %s", zp->msg); + + pg_free(gzipcs->outbuf); + pg_free(gzipcs->zp); + pg_free(gzipcs); + cs->private_data = NULL; +} + +static void +DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush) { GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; z_streamp zp = gzipcs->zp; @@ -78,27 +144,9 @@ DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush) static void EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs) { - GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; - z_streamp zp; - - if (gzipcs->zp) - { - zp = gzipcs->zp; - zp->next_in = NULL; - zp->avail_in = 0; - - /* Flush any remaining data from zlib buffer */ - DeflateCompressorGzip(AH, cs, true); - - if (deflateEnd(zp) != Z_OK) - pg_fatal("could not close compression stream: %s", zp->msg); - - pg_free(gzipcs->outbuf); - pg_free(gzipcs->zp); - } - - pg_free(gzipcs); - cs->private_data = NULL; + /* If deflation was initialized, finalize it */ + if (cs->private_data) + DeflateCompressorEnd(AH, cs); } static void @@ -106,41 +154,10 @@ WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs, const void *data, size_t dLen) { GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; - z_streamp zp; - - if (!gzipcs->zp) - { - zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); - zp->zalloc = Z_NULL; - zp->zfree = Z_NULL; - zp->opaque = Z_NULL; - - /* - * outsize is the buffer size we tell zlib it can output to. We - * actually allocate one extra byte because some routines want to - * append a trailing zero byte to the zlib output. - */ - gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1); - gzipcs->outsize = ZLIB_OUT_SIZE; - - /* - * A level of zero simply copies the input one block at the time. This - * is probably not what the user wanted when calling this interface. - */ - if (cs->compression_spec.level == 0) - pg_fatal("requested to compress the archive yet no level was specified"); - - if (deflateInit(zp, cs->compression_spec.level) != Z_OK) - pg_fatal("could not initialize compression library: %s", zp->msg); - - /* Just be paranoid - maybe End is called after Start, with no Write */ - zp->next_out = gzipcs->outbuf; - zp->avail_out = gzipcs->outsize; - } gzipcs->zp->next_in = (void *) unconstify(void *, data); gzipcs->zp->avail_in = dLen; - DeflateCompressorGzip(AH, cs, false); + DeflateCompressorCommon(AH, cs, false); } static void @@ -214,17 +231,19 @@ void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec) { - GzipCompressorState *gzipcs; - cs->readData = ReadDataFromArchiveGzip; cs->writeData = WriteDataToArchiveGzip; cs->end = EndCompressorGzip; cs->compression_spec = compression_spec; - gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState)); - - cs->private_data = gzipcs; + /* + * If the caller has defined a write function, prepare the necessary + * state. Note that if the data is empty, End may be called immediately + * after Init, without ever calling Write. + */ + if (cs->writeF) + DeflateCompressorInit(cs); } diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 187e4b8d07d..14cd0d2d503 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -1194,6 +1194,29 @@ my %tests = ( }, }, + 'LO create (with no data)' => { + create_sql => + 'SELECT pg_catalog.lo_create(0);', + regexp => qr/^ + \QSELECT pg_catalog.lo_open\E \('\d+',\ \d+\);\n + \QSELECT pg_catalog.lo_close(0);\E + /xm, + like => { + %full_runs, + column_inserts => 1, + data_only => 1, + inserts => 1, + section_data => 1, + test_schema_plus_large_objects => 1, + }, + unlike => { + binary_upgrade => 1, + no_large_objects => 1, + schema_only => 1, + section_pre_data => 1, + }, + }, + 'COMMENT ON DATABASE postgres' => { regexp => qr/^COMMENT ON DATABASE postgres IS .+;/m, -- 2.34.1