My colleague Suraj did testing and noticed the performance impact with the checksums. On further testing, he found that specifically with sha its more of performance impact.
Please find below statistics: no of tables without checksum SHA256 checksum % performnce overhead with SHA-256 md5 checksum % performnce overhead with md5 CRC checksum % performnce overhead with CRC 10 (100 MB in each table) real 0m10.957s user 0m0.367s sys 0m2.275s real 0m16.816s user 0m0.210s sys 0m2.067s 53% real 0m11.895s user 0m0.174s sys 0m1.725s 8% real 0m11.136s user 0m0.365s sys 0m2.298s 2% 20 (100 MB in each table) real 0m20.610s user 0m0.484s sys 0m3.198s real 0m31.745s user 0m0.569s sys 0m4.089s 54% real 0m22.717s user 0m0.638s sys 0m4.026s 10% real 0m21.075s user 0m0.538s sys 0m3.417s 2% 50 (100 MB in each table) real 0m49.143s user 0m1.646s sys 0m8.499s real 1m13.683s user 0m1.305s sys 0m10.541s 50% real 0m51.856s user 0m0.932s sys 0m7.702s 6% real 0m49.689s user 0m1.028s sys 0m6.921s 1% 100 (100 MB in each table) real 1m34.308s user 0m2.265s sys 0m14.717s real 2m22.403s user 0m2.613s sys 0m20.776s 51% real 1m41.524s user 0m2.158s sys 0m15.949s 8% real 1m35.045s user 0m2.061s sys 0m16.308s 1% 100 (1 GB in each table) real 17m18.336s user 0m20.222s sys 3m12.960s real 24m45.942s user 0m26.911s sys 3m33.501s 43% real 17m41.670s user 0m26.506s sys 3m18.402s 2% real 17m22.296s user 0m26.811s sys 3m56.653s sometimes, this test completes within the same time as without checksum. approx. 0.5% Considering the above results, I modified the earlier Robert's patch and added "manifest_with_checksums" option to pg_basebackup. With a new patch. by default, checksums will be disabled and will be only enabled when "manifest_with_checksums" option is provided. Also re-based all patch set. Regards, -- Rushabh Lathia www.EnterpriseDB.com On Tue, Oct 1, 2019 at 5:43 PM Robert Haas <robertmh...@gmail.com> wrote: > On Mon, Sep 30, 2019 at 5:31 AM Jeevan Chalke > <jeevan.cha...@enterprisedb.com> wrote: > > Entry for directory is not added in manifest. So it might be difficult > > at client to get to know about the directories. Will it be good to add > > an entry for each directory too? May be like: > > Dir <dirname> <mtime> > > Well, what kind of corruption would this allow us to detect that we > can't detect as things stand? I think the only case is an empty > directory. If it's not empty, we'd have some entries for the files in > that directory, and those files won't be able to exist unless the > directory does. But, how would we end up backing up an empty > directory, anyway? > > I don't really *mind* adding directories into the manifest, but I'm > not sure how much it helps. > > -- > Robert Haas > EnterpriseDB: http://www.enterprisedb.com > The Enterprise PostgreSQL Company > > > -- Rushabh Lathia
From 75bc29edb4d84697901d257ac98514c186c29508 Mon Sep 17 00:00:00 2001 From: Rushabh Lathia <rushabh.lat...@enterprisedb.com> Date: Wed, 13 Nov 2019 15:19:22 +0530 Subject: [PATCH] Reduce code duplication and eliminate weird macro tricks. --- src/bin/pg_basebackup/pg_basebackup.c | 1005 +++++++++++++++++---------------- 1 file changed, 507 insertions(+), 498 deletions(-) diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index a9d162a..0565212 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -57,6 +57,40 @@ typedef struct TablespaceList TablespaceListCell *tail; } TablespaceList; +typedef struct WriteTarState +{ + int tablespacenum; + char filename[MAXPGPATH]; + FILE *tarfile; + char tarhdr[512]; + bool basetablespace; + bool in_tarhdr; + bool skip_file; + bool is_recovery_guc_supported; + bool is_postgresql_auto_conf; + bool found_postgresql_auto_conf; + int file_padding_len; + size_t tarhdrsz; + pgoff_t filesz; +#ifdef HAVE_LIBZ + gzFile ztarfile; +#endif +} WriteTarState; + +typedef struct UnpackTarState +{ + int tablespacenum; + char current_path[MAXPGPATH]; + char filename[MAXPGPATH]; + const char *mapped_tblspc_path; + pgoff_t current_len_left; + int current_padding; + FILE *file; +} UnpackTarState; + +typedef void (*WriteDataCallback) (size_t nbytes, char *buf, + void *callback_data); + /* * pg_xlog has been renamed to pg_wal in version 10. This version number * should be compared with PQserverVersion(). @@ -142,7 +176,10 @@ static void verify_dir_is_empty_or_create(char *dirname, bool *created, bool *fo static void progress_report(int tablespacenum, const char *filename, bool force); static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum); +static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data); static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum); +static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, + void *callback_data); static void BaseBackup(void); static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline, @@ -874,42 +911,78 @@ parse_max_rate(char *src) } /* + * Read a stream of COPY data and invoke the provided callback for each + * chunk. + */ +static void +ReceiveCopyData(PGconn *conn, WriteDataCallback callback, + void *callback_data) +{ + PGresult *res; + + /* Get the COPY data stream. */ + res = PQgetResult(conn); + if (PQresultStatus(res) != PGRES_COPY_OUT) + { + pg_log_error("could not get COPY data stream: %s", + PQerrorMessage(conn)); + exit(1); + } + PQclear(res); + + /* Loop over chunks until done. */ + while (1) + { + int r; + char *copybuf; + + r = PQgetCopyData(conn, ©buf, 0); + if (r == -1) + { + /* End of chunk. */ + break; + } + else if (r == -2) + { + pg_log_error("could not read COPY data: %s", + PQerrorMessage(conn)); + exit(1); + } + + (*callback) (r, copybuf, callback_data); + + PQfreemem(copybuf); + } +} + +/* * Write a piece of tar data */ static void -writeTarData( -#ifdef HAVE_LIBZ - gzFile ztarfile, -#endif - FILE *tarfile, char *buf, int r, char *current_file) +writeTarData(WriteTarState * state, char *buf, int r) { #ifdef HAVE_LIBZ - if (ztarfile != NULL) + if (state->ztarfile != NULL) { - if (gzwrite(ztarfile, buf, r) != r) + if (gzwrite(state->ztarfile, buf, r) != r) { pg_log_error("could not write to compressed file \"%s\": %s", - current_file, get_gz_error(ztarfile)); + state->filename, get_gz_error(state->ztarfile)); exit(1); } } else #endif { - if (fwrite(buf, r, 1, tarfile) != 1) + if (fwrite(buf, r, 1, state->tarfile) != 1) { - pg_log_error("could not write to file \"%s\": %m", current_file); + pg_log_error("could not write to file \"%s\": %m", + state->filename); exit(1); } } } -#ifdef HAVE_LIBZ -#define WRITE_TAR_DATA(buf, sz) writeTarData(ztarfile, tarfile, buf, sz, filename) -#else -#define WRITE_TAR_DATA(buf, sz) writeTarData(tarfile, buf, sz, filename) -#endif - /* * Receive a tar format file from the connection to the server, and write * the data from this file directly into a tar file. If compression is @@ -923,29 +996,19 @@ writeTarData( static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) { - char filename[MAXPGPATH]; - char *copybuf = NULL; - FILE *tarfile = NULL; - char tarhdr[512]; - bool basetablespace = PQgetisnull(res, rownum, 0); - bool in_tarhdr = true; - bool skip_file = false; - bool is_recovery_guc_supported = true; - bool is_postgresql_auto_conf = false; - bool found_postgresql_auto_conf = false; - int file_padding_len = 0; - size_t tarhdrsz = 0; - pgoff_t filesz = 0; + char zerobuf[1024]; + WriteTarState state; -#ifdef HAVE_LIBZ - gzFile ztarfile = NULL; -#endif + memset(&state, 0, sizeof(state)); + state.tablespacenum = rownum; + state.basetablespace = PQgetisnull(res, rownum, 0); + state.in_tarhdr = true; /* recovery.conf is integrated into postgresql.conf in 12 and newer */ - if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_RECOVERY_GUC) - is_recovery_guc_supported = false; + if (PQserverVersion(conn) >= MINIMUM_VERSION_FOR_RECOVERY_GUC) + state.is_recovery_guc_supported = true; - if (basetablespace) + if (state.basetablespace) { /* * Base tablespaces @@ -959,40 +1022,42 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) #ifdef HAVE_LIBZ if (compresslevel != 0) { - ztarfile = gzdopen(dup(fileno(stdout)), "wb"); - if (gzsetparams(ztarfile, compresslevel, + state.ztarfile = gzdopen(dup(fileno(stdout)), "wb"); + if (gzsetparams(state.ztarfile, compresslevel, Z_DEFAULT_STRATEGY) != Z_OK) { pg_log_error("could not set compression level %d: %s", - compresslevel, get_gz_error(ztarfile)); + compresslevel, get_gz_error(state.ztarfile)); exit(1); } } else #endif - tarfile = stdout; - strcpy(filename, "-"); + state.tarfile = stdout; + strcpy(state.filename, "-"); } else { #ifdef HAVE_LIBZ if (compresslevel != 0) { - snprintf(filename, sizeof(filename), "%s/base.tar.gz", basedir); - ztarfile = gzopen(filename, "wb"); - if (gzsetparams(ztarfile, compresslevel, + snprintf(state.filename, sizeof(state.filename), + "%s/base.tar.gz", basedir); + state.ztarfile = gzopen(state.filename, "wb"); + if (gzsetparams(state.ztarfile, compresslevel, Z_DEFAULT_STRATEGY) != Z_OK) { pg_log_error("could not set compression level %d: %s", - compresslevel, get_gz_error(ztarfile)); + compresslevel, get_gz_error(state.ztarfile)); exit(1); } } else #endif { - snprintf(filename, sizeof(filename), "%s/base.tar", basedir); - tarfile = fopen(filename, "wb"); + snprintf(state.filename, sizeof(state.filename), + "%s/base.tar", basedir); + state.tarfile = fopen(state.filename, "wb"); } } } @@ -1004,34 +1069,35 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) #ifdef HAVE_LIBZ if (compresslevel != 0) { - snprintf(filename, sizeof(filename), "%s/%s.tar.gz", basedir, - PQgetvalue(res, rownum, 0)); - ztarfile = gzopen(filename, "wb"); - if (gzsetparams(ztarfile, compresslevel, + snprintf(state.filename, sizeof(state.filename), + "%s/%s.tar.gz", + basedir, PQgetvalue(res, rownum, 0)); + state.ztarfile = gzopen(state.filename, "wb"); + if (gzsetparams(state.ztarfile, compresslevel, Z_DEFAULT_STRATEGY) != Z_OK) { pg_log_error("could not set compression level %d: %s", - compresslevel, get_gz_error(ztarfile)); + compresslevel, get_gz_error(state.ztarfile)); exit(1); } } else #endif { - snprintf(filename, sizeof(filename), "%s/%s.tar", basedir, - PQgetvalue(res, rownum, 0)); - tarfile = fopen(filename, "wb"); + snprintf(state.filename, sizeof(state.filename), "%s/%s.tar", + basedir, PQgetvalue(res, rownum, 0)); + state.tarfile = fopen(state.filename, "wb"); } } #ifdef HAVE_LIBZ if (compresslevel != 0) { - if (!ztarfile) + if (!state.ztarfile) { /* Compression is in use */ pg_log_error("could not create compressed file \"%s\": %s", - filename, get_gz_error(ztarfile)); + state.filename, get_gz_error(state.ztarfile)); exit(1); } } @@ -1039,314 +1105,292 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) #endif { /* Either no zlib support, or zlib support but compresslevel = 0 */ - if (!tarfile) + if (!state.tarfile) { - pg_log_error("could not create file \"%s\": %m", filename); + pg_log_error("could not create file \"%s\": %m", state.filename); exit(1); } } + ReceiveCopyData(conn, ReceiveTarCopyChunk, &state); + /* - * Get the COPY data stream + * End of copy data. If requested, and this is the base tablespace, write + * configuration file into the tarfile. When done, close the file (but not + * stdout). + * + * Also, write two completely empty blocks at the end of the tar file, as + * required by some tar programs. */ - res = PQgetResult(conn); - if (PQresultStatus(res) != PGRES_COPY_OUT) - { - pg_log_error("could not get COPY data stream: %s", - PQerrorMessage(conn)); - exit(1); - } - while (1) + MemSet(zerobuf, 0, sizeof(zerobuf)); + + if (state.basetablespace && writerecoveryconf) { - int r; + char header[512]; - if (copybuf != NULL) + /* + * If postgresql.auto.conf has not been found in the streamed data, + * add recovery configuration to postgresql.auto.conf if recovery + * parameters are GUCs. If the instance connected to is older than + * 12, create recovery.conf with this data otherwise. + */ + if (!state.found_postgresql_auto_conf || !state.is_recovery_guc_supported) { - PQfreemem(copybuf); - copybuf = NULL; + int padding; + + tarCreateHeader(header, + state.is_recovery_guc_supported ? "postgresql.auto.conf" : "recovery.conf", + NULL, + recoveryconfcontents->len, + pg_file_create_mode, 04000, 02000, + time(NULL)); + + padding = ((recoveryconfcontents->len + 511) & ~511) - recoveryconfcontents->len; + + writeTarData(&state, header, sizeof(header)); + writeTarData(&state, recoveryconfcontents->data, + recoveryconfcontents->len); + if (padding) + writeTarData(&state, zerobuf, padding); } - r = PQgetCopyData(conn, ©buf, 0); - if (r == -1) + /* + * standby.signal is supported only if recovery parameters are GUCs. + */ + if (state.is_recovery_guc_supported) { - /* - * End of chunk. If requested, and this is the base tablespace, - * write configuration file into the tarfile. When done, close the - * file (but not stdout). - * - * Also, write two completely empty blocks at the end of the tar - * file, as required by some tar programs. - */ - char zerobuf[1024]; + tarCreateHeader(header, "standby.signal", NULL, + 0, /* zero-length file */ + pg_file_create_mode, 04000, 02000, + time(NULL)); + + writeTarData(&state, header, sizeof(header)); + writeTarData(&state, zerobuf, 511); + } + } - MemSet(zerobuf, 0, sizeof(zerobuf)); + /* 2 * 512 bytes empty data at end of file */ + writeTarData(&state, zerobuf, sizeof(zerobuf)); - if (basetablespace && writerecoveryconf) +#ifdef HAVE_LIBZ + if (state.ztarfile != NULL) + { + if (gzclose(state.ztarfile) != 0) + { + pg_log_error("could not close compressed file \"%s\": %s", + state.filename, get_gz_error(state.ztarfile)); + exit(1); + } + } + else +#endif + { + if (strcmp(basedir, "-") != 0) + { + if (fclose(state.tarfile) != 0) { - char header[512]; + pg_log_error("could not close file \"%s\": %m", + state.filename); + exit(1); + } + } + } - /* - * If postgresql.auto.conf has not been found in the streamed - * data, add recovery configuration to postgresql.auto.conf if - * recovery parameters are GUCs. If the instance connected to - * is older than 12, create recovery.conf with this data - * otherwise. - */ - if (!found_postgresql_auto_conf || !is_recovery_guc_supported) - { - int padding; + progress_report(rownum, state.filename, true); - tarCreateHeader(header, - is_recovery_guc_supported ? "postgresql.auto.conf" : "recovery.conf", - NULL, - recoveryconfcontents->len, - pg_file_create_mode, 04000, 02000, - time(NULL)); + /* + * Do not sync the resulting tar file yet, all files are synced once at + * the end. + */ +} - padding = ((recoveryconfcontents->len + 511) & ~511) - recoveryconfcontents->len; +/* + * Receive one chunk of tar-format data from the server. + */ +static void +ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data) +{ + WriteTarState *state = callback_data; - WRITE_TAR_DATA(header, sizeof(header)); - WRITE_TAR_DATA(recoveryconfcontents->data, - recoveryconfcontents->len); - if (padding) - WRITE_TAR_DATA(zerobuf, padding); - } + if (!writerecoveryconf || !state->basetablespace) + { + /* + * When not writing config file, or when not working on the base + * tablespace, we never have to look for an existing configuration + * file in the stream. + */ + writeTarData(state, copybuf, r); + } + else + { + /* + * Look for a config file in the existing tar stream. If it's there, + * we must skip it so we can later overwrite it with our own version + * of the file. + * + * To do this, we have to process the individual files inside the TAR + * stream. The stream consists of a header and zero or more chunks, + * all 512 bytes long. The stream from the server is broken up into + * smaller pieces, so we have to track the size of the files to find + * the next header structure. + */ + int rr = r; + int pos = 0; + while (rr > 0) + { + if (state->in_tarhdr) + { /* - * standby.signal is supported only if recovery parameters are - * GUCs. + * We're currently reading a header structure inside the TAR + * stream, i.e. the file metadata. */ - if (is_recovery_guc_supported) + if (state->tarhdrsz < 512) { - tarCreateHeader(header, "standby.signal", NULL, - 0, /* zero-length file */ - pg_file_create_mode, 04000, 02000, - time(NULL)); + /* + * Copy the header structure into tarhdr in case the + * header is not aligned to 512 bytes or it's not returned + * in whole by the last PQgetCopyData call. + */ + int hdrleft; + int bytes2copy; - WRITE_TAR_DATA(header, sizeof(header)); - WRITE_TAR_DATA(zerobuf, 511); - } - } + hdrleft = 512 - state->tarhdrsz; + bytes2copy = (rr > hdrleft ? hdrleft : rr); - /* 2 * 512 bytes empty data at end of file */ - WRITE_TAR_DATA(zerobuf, sizeof(zerobuf)); + memcpy(&state->tarhdr[state->tarhdrsz], copybuf + pos, + bytes2copy); -#ifdef HAVE_LIBZ - if (ztarfile != NULL) - { - if (gzclose(ztarfile) != 0) - { - pg_log_error("could not close compressed file \"%s\": %s", - filename, get_gz_error(ztarfile)); - exit(1); + rr -= bytes2copy; + pos += bytes2copy; + state->tarhdrsz += bytes2copy; } - } - else -#endif - { - if (strcmp(basedir, "-") != 0) - { - if (fclose(tarfile) != 0) - { - pg_log_error("could not close file \"%s\": %m", - filename); - exit(1); - } - } - } - - break; - } - else if (r == -2) - { - pg_log_error("could not read COPY data: %s", - PQerrorMessage(conn)); - exit(1); - } - - if (!writerecoveryconf || !basetablespace) - { - /* - * When not writing config file, or when not working on the base - * tablespace, we never have to look for an existing configuration - * file in the stream. - */ - WRITE_TAR_DATA(copybuf, r); - } - else - { - /* - * Look for a config file in the existing tar stream. If it's - * there, we must skip it so we can later overwrite it with our - * own version of the file. - * - * To do this, we have to process the individual files inside the - * TAR stream. The stream consists of a header and zero or more - * chunks, all 512 bytes long. The stream from the server is - * broken up into smaller pieces, so we have to track the size of - * the files to find the next header structure. - */ - int rr = r; - int pos = 0; - - while (rr > 0) - { - if (in_tarhdr) + else { /* - * We're currently reading a header structure inside the - * TAR stream, i.e. the file metadata. + * We have the complete header structure in tarhdr, look + * at the file metadata: we may want append recovery info + * into postgresql.auto.conf and skip standby.signal file + * if recovery parameters are integrated as GUCs, and + * recovery.conf otherwise. In both cases we must + * calculate tar padding. */ - if (tarhdrsz < 512) + if (state->is_recovery_guc_supported) { - /* - * Copy the header structure into tarhdr in case the - * header is not aligned to 512 bytes or it's not - * returned in whole by the last PQgetCopyData call. - */ - int hdrleft; - int bytes2copy; - - hdrleft = 512 - tarhdrsz; - bytes2copy = (rr > hdrleft ? hdrleft : rr); - - memcpy(&tarhdr[tarhdrsz], copybuf + pos, bytes2copy); - - rr -= bytes2copy; - pos += bytes2copy; - tarhdrsz += bytes2copy; + state->skip_file = + (strcmp(&state->tarhdr[0], "standby.signal") == 0); + state->is_postgresql_auto_conf = + (strcmp(&state->tarhdr[0], "postgresql.auto.conf") == 0); } else - { - /* - * We have the complete header structure in tarhdr, - * look at the file metadata: we may want append - * recovery info into postgresql.auto.conf and skip - * standby.signal file if recovery parameters are - * integrated as GUCs, and recovery.conf otherwise. In - * both cases we must calculate tar padding. - */ - if (is_recovery_guc_supported) - { - skip_file = (strcmp(&tarhdr[0], "standby.signal") == 0); - is_postgresql_auto_conf = (strcmp(&tarhdr[0], "postgresql.auto.conf") == 0); - } - else - skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0); + state->skip_file = + (strcmp(&state->tarhdr[0], "recovery.conf") == 0); - filesz = read_tar_number(&tarhdr[124], 12); - file_padding_len = ((filesz + 511) & ~511) - filesz; + state->filesz = read_tar_number(&state->tarhdr[124], 12); + state->file_padding_len = + ((state->filesz + 511) & ~511) - state->filesz; - if (is_recovery_guc_supported && - is_postgresql_auto_conf && - writerecoveryconf) - { - /* replace tar header */ - char header[512]; + if (state->is_recovery_guc_supported && + state->is_postgresql_auto_conf && + writerecoveryconf) + { + /* replace tar header */ + char header[512]; - tarCreateHeader(header, "postgresql.auto.conf", NULL, - filesz + recoveryconfcontents->len, - pg_file_create_mode, 04000, 02000, - time(NULL)); + tarCreateHeader(header, "postgresql.auto.conf", NULL, + state->filesz + recoveryconfcontents->len, + pg_file_create_mode, 04000, 02000, + time(NULL)); - WRITE_TAR_DATA(header, sizeof(header)); - } - else + writeTarData(state, header, sizeof(header)); + } + else + { + /* copy stream with padding */ + state->filesz += state->file_padding_len; + + if (!state->skip_file) { - /* copy stream with padding */ - filesz += file_padding_len; - - if (!skip_file) - { - /* - * If we're not skipping the file, write the - * tar header unmodified. - */ - WRITE_TAR_DATA(tarhdr, 512); - } + /* + * If we're not skipping the file, write the tar + * header unmodified. + */ + writeTarData(state, state->tarhdr, 512); } - - /* Next part is the file, not the header */ - in_tarhdr = false; } + + /* Next part is the file, not the header */ + state->in_tarhdr = false; } - else + } + else + { + /* + * We're processing a file's contents. + */ + if (state->filesz > 0) { /* - * We're processing a file's contents. + * We still have data to read (and possibly write). */ - if (filesz > 0) - { - /* - * We still have data to read (and possibly write). - */ - int bytes2write; + int bytes2write; - bytes2write = (filesz > rr ? rr : filesz); + bytes2write = (state->filesz > rr ? rr : state->filesz); - if (!skip_file) - WRITE_TAR_DATA(copybuf + pos, bytes2write); + if (!state->skip_file) + writeTarData(state, copybuf + pos, bytes2write); - rr -= bytes2write; - pos += bytes2write; - filesz -= bytes2write; - } - else if (is_recovery_guc_supported && - is_postgresql_auto_conf && - writerecoveryconf) - { - /* append recovery config to postgresql.auto.conf */ - int padding; - int tailsize; + rr -= bytes2write; + pos += bytes2write; + state->filesz -= bytes2write; + } + else if (state->is_recovery_guc_supported && + state->is_postgresql_auto_conf && + writerecoveryconf) + { + /* append recovery config to postgresql.auto.conf */ + int padding; + int tailsize; - tailsize = (512 - file_padding_len) + recoveryconfcontents->len; - padding = ((tailsize + 511) & ~511) - tailsize; + tailsize = (512 - state->file_padding_len) + recoveryconfcontents->len; + padding = ((tailsize + 511) & ~511) - tailsize; - WRITE_TAR_DATA(recoveryconfcontents->data, recoveryconfcontents->len); + writeTarData(state, recoveryconfcontents->data, + recoveryconfcontents->len); - if (padding) - { - char zerobuf[512]; + if (padding) + { + char zerobuf[512]; - MemSet(zerobuf, 0, sizeof(zerobuf)); - WRITE_TAR_DATA(zerobuf, padding); - } + MemSet(zerobuf, 0, sizeof(zerobuf)); + writeTarData(state, zerobuf, padding); + } - /* skip original file padding */ - is_postgresql_auto_conf = false; - skip_file = true; - filesz += file_padding_len; + /* skip original file padding */ + state->is_postgresql_auto_conf = false; + state->skip_file = true; + state->filesz += state->file_padding_len; - found_postgresql_auto_conf = true; - } - else - { - /* - * No more data in the current file, the next piece of - * data (if any) will be a new file header structure. - */ - in_tarhdr = true; - skip_file = false; - is_postgresql_auto_conf = false; - tarhdrsz = 0; - filesz = 0; - } + state->found_postgresql_auto_conf = true; + } + else + { + /* + * No more data in the current file, the next piece of + * data (if any) will be a new file header structure. + */ + state->in_tarhdr = true; + state->skip_file = false; + state->is_postgresql_auto_conf = false; + state->tarhdrsz = 0; + state->filesz = 0; } } } - totaldone += r; - progress_report(rownum, filename, false); - } /* while (1) */ - progress_report(rownum, filename, true); - - if (copybuf != NULL) - PQfreemem(copybuf); - - /* - * Do not sync the resulting tar file yet, all files are synced once at - * the end. - */ + } + totaldone += r; + progress_report(state->tablespacenum, state->filename, false); } @@ -1384,254 +1428,219 @@ get_tablespace_mapping(const char *dir) static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) { - char current_path[MAXPGPATH]; - char filename[MAXPGPATH]; - const char *mapped_tblspc_path; - pgoff_t current_len_left = 0; - int current_padding = 0; + UnpackTarState state; bool basetablespace; - char *copybuf = NULL; - FILE *file = NULL; + + memset(&state, 0, sizeof(state)); + state.tablespacenum = rownum; basetablespace = PQgetisnull(res, rownum, 0); if (basetablespace) - strlcpy(current_path, basedir, sizeof(current_path)); + strlcpy(state.current_path, basedir, sizeof(state.current_path)); else - strlcpy(current_path, + strlcpy(state.current_path, get_tablespace_mapping(PQgetvalue(res, rownum, 1)), - sizeof(current_path)); + sizeof(state.current_path)); - /* - * Get the COPY data - */ - res = PQgetResult(conn); - if (PQresultStatus(res) != PGRES_COPY_OUT) + ReceiveCopyData(conn, ReceiveTarAndUnpackCopyChunk, &state); + + + if (state.file) + fclose(state.file); + + progress_report(rownum, state.filename, true); + + if (state.file != NULL) { - pg_log_error("could not get COPY data stream: %s", - PQerrorMessage(conn)); + pg_log_error("COPY stream ended before last file was finished"); exit(1); } - while (1) - { - int r; + if (basetablespace && writerecoveryconf) + WriteRecoveryConfig(conn, basedir, recoveryconfcontents); - if (copybuf != NULL) - { - PQfreemem(copybuf); - copybuf = NULL; - } + /* + * No data is synced here, everything is done for all tablespaces at the + * end. + */ +} - r = PQgetCopyData(conn, ©buf, 0); +static void +ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data) +{ + UnpackTarState *state = callback_data; - if (r == -1) - { - /* - * End of chunk - */ - if (file) - fclose(file); + if (state->file == NULL) + { +#ifndef WIN32 + int filemode; +#endif - break; - } - else if (r == -2) + /* + * No current file, so this must be the header for a new file + */ + if (r != 512) { - pg_log_error("could not read COPY data: %s", - PQerrorMessage(conn)); + pg_log_error("invalid tar block header size: %zu", r); exit(1); } + totaldone += 512; - if (file == NULL) - { -#ifndef WIN32 - int filemode; -#endif - - /* - * No current file, so this must be the header for a new file - */ - if (r != 512) - { - pg_log_error("invalid tar block header size: %d", r); - exit(1); - } - totaldone += 512; - - current_len_left = read_tar_number(©buf[124], 12); + state->current_len_left = read_tar_number(©buf[124], 12); #ifndef WIN32 - /* Set permissions on the file */ - filemode = read_tar_number(©buf[100], 8); + /* Set permissions on the file */ + filemode = read_tar_number(©buf[100], 8); #endif - /* - * All files are padded up to 512 bytes - */ - current_padding = - ((current_len_left + 511) & ~511) - current_len_left; + /* + * All files are padded up to 512 bytes + */ + state->current_padding = + ((state->current_len_left + 511) & ~511) - state->current_len_left; + /* + * First part of header is zero terminated filename + */ + snprintf(state->filename, sizeof(state->filename), + "%s/%s", state->current_path, copybuf); + if (state->filename[strlen(state->filename) - 1] == '/') + { /* - * First part of header is zero terminated filename + * Ends in a slash means directory or symlink to directory */ - snprintf(filename, sizeof(filename), "%s/%s", current_path, - copybuf); - if (filename[strlen(filename) - 1] == '/') + if (copybuf[156] == '5') { /* - * Ends in a slash means directory or symlink to directory + * Directory. Remove trailing slash first. */ - if (copybuf[156] == '5') + state->filename[strlen(state->filename) - 1] = '\0'; + if (mkdir(state->filename, pg_dir_create_mode) != 0) { /* - * Directory + * When streaming WAL, pg_wal (or pg_xlog for pre-9.6 + * clusters) will have been created by the wal receiver + * process. Also, when the WAL directory location was + * specified, pg_wal (or pg_xlog) has already been created + * as a symbolic link before starting the actual backup. + * So just ignore creation failures on related + * directories. */ - filename[strlen(filename) - 1] = '\0'; /* Remove trailing slash */ - if (mkdir(filename, pg_dir_create_mode) != 0) + if (!((pg_str_endswith(state->filename, "/pg_wal") || + pg_str_endswith(state->filename, "/pg_xlog") || + pg_str_endswith(state->filename, "/archive_status")) && + errno == EEXIST)) { - /* - * When streaming WAL, pg_wal (or pg_xlog for pre-9.6 - * clusters) will have been created by the wal - * receiver process. Also, when the WAL directory - * location was specified, pg_wal (or pg_xlog) has - * already been created as a symbolic link before - * starting the actual backup. So just ignore creation - * failures on related directories. - */ - if (!((pg_str_endswith(filename, "/pg_wal") || - pg_str_endswith(filename, "/pg_xlog") || - pg_str_endswith(filename, "/archive_status")) && - errno == EEXIST)) - { - pg_log_error("could not create directory \"%s\": %m", - filename); - exit(1); - } + pg_log_error("could not create directory \"%s\": %m", + state->filename); + exit(1); } + } #ifndef WIN32 - if (chmod(filename, (mode_t) filemode)) - pg_log_error("could not set permissions on directory \"%s\": %m", - filename); + if (chmod(state->filename, (mode_t) filemode)) + pg_log_error("could not set permissions on directory \"%s\": %m", + state->filename); #endif - } - else if (copybuf[156] == '2') - { - /* - * Symbolic link - * - * It's most likely a link in pg_tblspc directory, to the - * location of a tablespace. Apply any tablespace mapping - * given on the command line (--tablespace-mapping). (We - * blindly apply the mapping without checking that the - * link really is inside pg_tblspc. We don't expect there - * to be other symlinks in a data directory, but if there - * are, you can call it an undocumented feature that you - * can map them too.) - */ - filename[strlen(filename) - 1] = '\0'; /* Remove trailing slash */ + } + else if (copybuf[156] == '2') + { + /* + * Symbolic link + * + * It's most likely a link in pg_tblspc directory, to the + * location of a tablespace. Apply any tablespace mapping + * given on the command line (--tablespace-mapping). (We + * blindly apply the mapping without checking that the link + * really is inside pg_tblspc. We don't expect there to be + * other symlinks in a data directory, but if there are, you + * can call it an undocumented feature that you can map them + * too.) + */ + state->filename[strlen(state->filename) - 1] = '\0'; /* Remove trailing slash */ - mapped_tblspc_path = get_tablespace_mapping(©buf[157]); - if (symlink(mapped_tblspc_path, filename) != 0) - { - pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m", - filename, mapped_tblspc_path); - exit(1); - } - } - else + state->mapped_tblspc_path = + get_tablespace_mapping(©buf[157]); + if (symlink(state->mapped_tblspc_path, state->filename) != 0) { - pg_log_error("unrecognized link indicator \"%c\"", - copybuf[156]); + pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m", + state->filename, state->mapped_tblspc_path); exit(1); } - continue; /* directory or link handled */ } - - /* - * regular file - */ - file = fopen(filename, "wb"); - if (!file) + else { - pg_log_error("could not create file \"%s\": %m", filename); + pg_log_error("unrecognized link indicator \"%c\"", + copybuf[156]); exit(1); } + return; /* directory or link handled */ + } + + /* + * regular file + */ + state->file = fopen(state->filename, "wb"); + if (!state->file) + { + pg_log_error("could not create file \"%s\": %m", state->filename); + exit(1); + } #ifndef WIN32 - if (chmod(filename, (mode_t) filemode)) - pg_log_error("could not set permissions on file \"%s\": %m", - filename); + if (chmod(state->filename, (mode_t) filemode)) + pg_log_error("could not set permissions on file \"%s\": %m", + state->filename); #endif - if (current_len_left == 0) - { - /* - * Done with this file, next one will be a new tar header - */ - fclose(file); - file = NULL; - continue; - } - } /* new file */ - else + if (state->current_len_left == 0) { /* - * Continuing blocks in existing file + * Done with this file, next one will be a new tar header */ - if (current_len_left == 0 && r == current_padding) - { - /* - * Received the padding block for this file, ignore it and - * close the file, then move on to the next tar header. - */ - fclose(file); - file = NULL; - totaldone += r; - continue; - } - - if (fwrite(copybuf, r, 1, file) != 1) - { - pg_log_error("could not write to file \"%s\": %m", filename); - exit(1); - } - totaldone += r; - progress_report(rownum, filename, false); - - current_len_left -= r; - if (current_len_left == 0 && current_padding == 0) - { - /* - * Received the last block, and there is no padding to be - * expected. Close the file and move on to the next tar - * header. - */ - fclose(file); - file = NULL; - continue; - } - } /* continuing data in existing file */ - } /* loop over all data blocks */ - progress_report(rownum, filename, true); - - if (file != NULL) + fclose(state->file); + state->file = NULL; + return; + } + } /* new file */ + else { - pg_log_error("COPY stream ended before last file was finished"); - exit(1); - } - - if (copybuf != NULL) - PQfreemem(copybuf); + /* + * Continuing blocks in existing file + */ + if (state->current_len_left == 0 && r == state->current_padding) + { + /* + * Received the padding block for this file, ignore it and close + * the file, then move on to the next tar header. + */ + fclose(state->file); + state->file = NULL; + totaldone += r; + return; + } - if (basetablespace && writerecoveryconf) - WriteRecoveryConfig(conn, basedir, recoveryconfcontents); + if (fwrite(copybuf, r, 1, state->file) != 1) + { + pg_log_error("could not write to file \"%s\": %m", state->filename); + exit(1); + } + totaldone += r; + progress_report(state->tablespacenum, state->filename, false); - /* - * No data is synced here, everything is done for all tablespaces at the - * end. - */ + state->current_len_left -= r; + if (state->current_len_left == 0 && state->current_padding == 0) + { + /* + * Received the last block, and there is no padding to be + * expected. Close the file and move on to the next tar header. + */ + fclose(state->file); + state->file = NULL; + return; + } + } /* continuing data in existing file */ } - static void BaseBackup(void) { -- 1.8.3.1
From c0bb04d4626da331813603627c17c5f30075713a Mon Sep 17 00:00:00 2001 From: Rushabh Lathia <rushabh.lat...@enterprisedb.com> Date: Tue, 19 Nov 2019 14:42:39 +0530 Subject: [PATCH] Make checksum optional in pg_basebackup. --- src/backend/replication/basebackup.c | 50 ++++++++++++++++++++++++++-------- src/backend/replication/repl_gram.y | 6 ++++ src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 12 ++++++-- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 273f837..a98d949 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -132,6 +132,9 @@ static long long int total_checksum_failures; /* Do not verify checksums. */ static bool noverify_checksums = false; +/* Add file entry in to manifest with checksums. */ +static bool manifest_with_checksums = false; + /* * The contents of these directories are removed or recreated during server * start so they are not included in backups. The directories themselves are @@ -674,6 +677,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_maxrate = false; bool o_tablespace_map = false; bool o_noverify_checksums = false; + bool o_manifest_with_checksums = false; MemSet(opt, 0, sizeof(*opt)); foreach(lopt, options) @@ -762,6 +766,16 @@ parse_basebackup_options(List *options, basebackup_options *opt) noverify_checksums = true; o_noverify_checksums = true; } + else if (strcmp(defel->defname, "manifest_with_checksums") == 0) + { + if (o_manifest_with_checksums) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + manifest_with_checksums = true; + o_manifest_with_checksums = true; + } + else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -930,15 +944,18 @@ AddFileToManifest(StringInfo manifest, const char *tsoid, pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", tm); /* Convert checksum to hexadecimal. */ - shatextlen = - hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf); - Assert(shatextlen + 1 == sizeof(shatextbuf)); - shatextbuf[shatextlen] = '\0'; + if (manifest_with_checksums) + { + shatextlen = + hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf); + Assert(shatextlen + 1 == sizeof(shatextbuf)); + shatextbuf[shatextlen] = '\0'; + } /* Add to manifest. */ appendStringInfo(manifest, "File\t%s\t%zu\t%s\t%s\n", escaped_filename == NULL ? filename : escaped_filename, - size, timebuf, shatextbuf); + size, timebuf, manifest_with_checksums ? shatextbuf : "-"); /* Avoid leaking memory. */ if (escaped_filename != NULL) @@ -1100,7 +1117,8 @@ sendFileWithContent(const char *filename, const char *content, pg_sha256_ctx sha256_ctx; uint8 shabuf[PG_SHA256_DIGEST_LENGTH]; - pg_sha256_init(&sha256_ctx); + if (manifest_with_checksums) + pg_sha256_init(&sha256_ctx); len = strlen(content); @@ -1134,8 +1152,12 @@ sendFileWithContent(const char *filename, const char *content, pq_putmessage('d', buf, pad); } - pg_sha256_update(&sha256_ctx, (uint8 *) content, len); - pg_sha256_final(&sha256_ctx, shabuf); + if (manifest_with_checksums) + { + pg_sha256_update(&sha256_ctx, (uint8 *) content, len); + pg_sha256_final(&sha256_ctx, shabuf); + } + AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime, shabuf); } @@ -1571,7 +1593,8 @@ sendFile(const char *readfilename, const char *tarfilename, pg_sha256_ctx sha256_ctx; uint8 shabuf[PG_SHA256_DIGEST_LENGTH]; - pg_sha256_init(&sha256_ctx); + if (manifest_with_checksums) + pg_sha256_init(&sha256_ctx); fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1742,7 +1765,8 @@ sendFile(const char *readfilename, const char *tarfilename, (errmsg("base backup could not send data, aborting backup"))); /* Also feed it to the checksum machinery. */ - pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt); + if (manifest_with_checksums) + pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt); len += cnt; throttle(cnt); @@ -1768,7 +1792,8 @@ sendFile(const char *readfilename, const char *tarfilename, { cnt = Min(sizeof(buf), statbuf->st_size - len); pq_putmessage('d', buf, cnt); - pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt); + if (manifest_with_checksums) + pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt); len += cnt; throttle(cnt); } @@ -1801,7 +1826,8 @@ sendFile(const char *readfilename, const char *tarfilename, total_checksum_failures += checksum_failures; - pg_sha256_final(&sha256_ctx, shabuf); + if (manifest_with_checksums) + pg_sha256_final(&sha256_ctx, shabuf); AddFileToManifest(manifest, tsoid, tarfilename, statbuf->st_size, statbuf->st_mtime, shabuf); diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index c4e11cc..542a3f7 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void); %token K_EXPORT_SNAPSHOT %token K_NOEXPORT_SNAPSHOT %token K_USE_SNAPSHOT +%token K_MANIFEST_WITH_CHECKSUMS %type <node> command %type <node> base_backup start_replication start_logical_replication @@ -214,6 +215,11 @@ base_backup_opt: $$ = makeDefElem("noverify_checksums", (Node *)makeInteger(true), -1); } + | K_MANIFEST_WITH_CHECKSUMS + { + $$ = makeDefElem("manifest_with_checksums", + (Node *)makeInteger(true), -1); + } ; create_replication_slot: diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index 380faeb..4f92bc1 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -107,6 +107,7 @@ EXPORT_SNAPSHOT { return K_EXPORT_SNAPSHOT; } NOEXPORT_SNAPSHOT { return K_NOEXPORT_SNAPSHOT; } USE_SNAPSHOT { return K_USE_SNAPSHOT; } WAIT { return K_WAIT; } +MANIFEST_WITH_CHECKSUMS { return K_MANIFEST_WITH_CHECKSUMS; } "," { return ','; } ";" { return ';'; } diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 508e4a6..f4f8ffe 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -141,6 +141,7 @@ static bool temp_replication_slot = true; static bool create_slot = false; static bool no_slot = false; static bool verify_checksums = true; +static bool manifest_with_checksums = false; static bool success = false; static bool made_new_pgdata = false; @@ -398,6 +399,8 @@ usage(void) printf(_(" --no-slot prevent creation of temporary replication slot\n")); printf(_(" --no-verify-checksums\n" " do not verify checksums\n")); + printf(_(" --manifest-with-checksums\n" + " do calculate checksums for manifest files\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connection string\n")); @@ -1821,7 +1824,7 @@ BaseBackup(void) } basebkp = - psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s", + psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s", escaped_label, showprogress ? "PROGRESS" : "", includewal == FETCH_WAL ? "WAL" : "", @@ -1829,7 +1832,8 @@ BaseBackup(void) includewal == NO_WAL ? "" : "NOWAIT", maxrate_clause ? maxrate_clause : "", format == 't' ? "TABLESPACE_MAP" : "", - verify_checksums ? "" : "NOVERIFY_CHECKSUMS"); + verify_checksums ? "" : "NOVERIFY_CHECKSUMS", + manifest_with_checksums ? "MANIFEST_WITH_CHECKSUMS" : ""); if (PQsendQuery(conn, basebkp) == 0) { @@ -2162,6 +2166,7 @@ main(int argc, char **argv) {"waldir", required_argument, NULL, 1}, {"no-slot", no_argument, NULL, 2}, {"no-verify-checksums", no_argument, NULL, 3}, + {"manifest-with-checksums", no_argument, NULL, 4}, {NULL, 0, NULL, 0} }; int c; @@ -2330,6 +2335,9 @@ main(int argc, char **argv) case 3: verify_checksums = false; break; + case 4: + manifest_with_checksums = true; + break; default: /* -- 1.8.3.1
From d7f34e6863b4d533d0fc4ada8f020308dfae107d Mon Sep 17 00:00:00 2001 From: Rushabh Lathia <rushabh.lat...@enterprisedb.com> Date: Sat, 16 Nov 2019 11:57:04 +0530 Subject: [PATCH] Fix warnings. --- src/backend/replication/basebackup.c | 7 +++++-- src/bin/pg_basebackup/pg_basebackup.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 9812f2a..273f837 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -902,6 +902,7 @@ AddFileToManifest(StringInfo manifest, const char *tsoid, static char timebuf[128]; static char shatextbuf[PG_SHA256_DIGEST_LENGTH * 2 + 1]; int shatextlen; + struct pg_tm *tm; /* * If this file is part of a tablespace, the filename passed to this @@ -923,8 +924,10 @@ AddFileToManifest(StringInfo manifest, const char *tsoid, * and since time zone definitions can change, possibly causing confusion, * use GMT always. */ - pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", - pg_gmtime(&mtime)); + tm = pg_gmtime(&mtime); + if (tm == NULL) + elog(ERROR, "could not convert epoch to timestamp: %m"); + pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", tm); /* Convert checksum to hexadecimal. */ shatextlen = diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index c56246f..508e4a6 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -1194,7 +1194,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) initPQExpBuffer(&buf); ReceiveBackupManifestInMemory(conn, &buf); - if (PQExpBufferBroken(&buf)) + if (PQExpBufferDataBroken(buf)) { pg_log_error("out of memory"); exit(1); -- 1.8.3.1
From 5c3baefd9c9caca252f949528667d2427b037579 Mon Sep 17 00:00:00 2001 From: Rushabh Lathia <rushabh.lat...@enterprisedb.com> Date: Wed, 13 Nov 2019 15:25:54 +0530 Subject: [PATCH] POC of backup manifest with file names, sizes, timestamps, checksums. --- src/backend/access/transam/xlog.c | 3 +- src/backend/replication/basebackup.c | 238 ++++++++++++++++++++++++++++++---- src/bin/pg_basebackup/pg_basebackup.c | 116 ++++++++++++++++- src/include/replication/basebackup.h | 4 +- 4 files changed, 334 insertions(+), 27 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 3b766e6..b7f1fe5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10495,7 +10495,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ti->oid = pstrdup(de->d_name); ti->path = pstrdup(buflinkpath.data); ti->rpath = relpath ? pstrdup(relpath) : NULL; - ti->size = infotbssize ? sendTablespace(fullpath, true) : -1; + ti->size = infotbssize ? + sendTablespace(fullpath, ti->oid, true, NULL) : -1; if (tablespaces) *tablespaces = lappend(*tablespaces, ti); diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 1fa4551..9812f2a 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -19,6 +19,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" #include "common/file_perm.h" +#include "common/sha2.h" #include "lib/stringinfo.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -55,16 +56,25 @@ typedef struct static int64 sendDir(const char *path, int basepathlen, bool sizeonly, - List *tablespaces, bool sendtblspclinks); + List *tablespaces, bool sendtblspclinks, + StringInfo manifest, const char *tsoid); static bool sendFile(const char *readfilename, const char *tarfilename, - struct stat *statbuf, bool missing_ok, Oid dboid); -static void sendFileWithContent(const char *filename, const char *content); + struct stat *statbuf, bool missing_ok, Oid dboid, + StringInfo manifest, const char *tsoid); +static void sendFileWithContent(const char *filename, const char *content, + StringInfo manifest); static int64 _tarWriteHeader(const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly); static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, bool sizeonly); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); +static void InitializeManifest(StringInfo manifest); +static void AddFileToManifest(StringInfo manifest, const char *tsoid, + const char *filename, size_t size, time_t mtime, + uint8 *shabuf); +static void SendBackupManifest(StringInfo manifest); +static char *escape_field_for_manifest(const char *s); static void base_backup_cleanup(int code, Datum arg); static void perform_base_backup(basebackup_options *opt); static void parse_basebackup_options(List *options, basebackup_options *opt); @@ -241,6 +251,7 @@ perform_base_backup(basebackup_options *opt) TimeLineID endtli; StringInfo labelfile; StringInfo tblspc_map_file = NULL; + StringInfo manifest; int datadirpathlen; List *tablespaces = NIL; @@ -250,6 +261,8 @@ perform_base_backup(basebackup_options *opt) labelfile = makeStringInfo(); tblspc_map_file = makeStringInfo(); + manifest = makeStringInfo(); + InitializeManifest(manifest); total_checksum_failures = 0; @@ -286,7 +299,10 @@ perform_base_backup(basebackup_options *opt) /* Add a node for the base directory at the end */ ti = palloc0(sizeof(tablespaceinfo)); - ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1; + if (opt->progress) + ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL); + else + ti->size = -1; tablespaces = lappend(tablespaces, ti); /* Send tablespace header */ @@ -333,7 +349,8 @@ perform_base_backup(basebackup_options *opt) struct stat statbuf; /* In the main tar, include the backup_label first... */ - sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data); + sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data, + manifest); /* * Send tablespace_map file if required and then the bulk of @@ -341,11 +358,12 @@ perform_base_backup(basebackup_options *opt) */ if (tblspc_map_file && opt->sendtblspcmapfile) { - sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data); - sendDir(".", 1, false, tablespaces, false); + sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data, + manifest); + sendDir(".", 1, false, tablespaces, false, manifest, NULL); } else - sendDir(".", 1, false, tablespaces, true); + sendDir(".", 1, false, tablespaces, true, manifest, NULL); /* ... and pg_control after everything else. */ if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) @@ -353,10 +371,11 @@ perform_base_backup(basebackup_options *opt) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", XLOG_CONTROL_FILE))); - sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid); + sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, + false, InvalidOid, manifest, NULL); } else - sendTablespace(ti->path, false); + sendTablespace(ti->path, ti->oid, false, manifest); /* * If we're including WAL, and this is the main data directory we @@ -575,7 +594,7 @@ perform_base_backup(basebackup_options *opt) * complete segment. */ StatusFilePath(pathbuf, walFileName, ".done"); - sendFileWithContent(pathbuf, ""); + sendFileWithContent(pathbuf, "", manifest); } /* @@ -598,16 +617,20 @@ perform_base_backup(basebackup_options *opt) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathbuf))); - sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid); + sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid, manifest, + NULL); /* unconditionally mark file as archived */ StatusFilePath(pathbuf, fname, ".done"); - sendFileWithContent(pathbuf, ""); + sendFileWithContent(pathbuf, "", manifest); } /* Send CopyDone message for the last tar file */ pq_putemptymessage('c'); } + + SendBackupManifest(manifest); + SendXlogRecPtrResult(endptr, endtli); if (total_checksum_failures) @@ -860,6 +883,151 @@ SendBackupHeader(List *tablespaces) pq_puttextmessage('C', "SELECT"); } +static void +InitializeManifest(StringInfo manifest) +{ + appendStringInfoString(manifest, "PostgreSQL-Backup-Manifest-Version 1\n"); +} + +/* + * Add an entry to the backup manifest for a file. + */ +static void +AddFileToManifest(StringInfo manifest, const char *tsoid, + const char *filename, size_t size, time_t mtime, + uint8 *shabuf) +{ + char pathbuf[MAXPGPATH]; + char *escaped_filename; + static char timebuf[128]; + static char shatextbuf[PG_SHA256_DIGEST_LENGTH * 2 + 1]; + int shatextlen; + + /* + * If this file is part of a tablespace, the filename passed to this + * function will be relative to the tar file that contains it. We want + * the pathname relative to the data directory (ignoring the intermediate + * symlink traversal). + */ + if (tsoid != NULL) + { + snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", tsoid, filename); + filename = pathbuf; + } + + /* Escape filename, if necessary. */ + escaped_filename = escape_field_for_manifest(filename); + + /* + * Convert time to a string. Since it's not clear what time zone to use + * and since time zone definitions can change, possibly causing confusion, + * use GMT always. + */ + pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", + pg_gmtime(&mtime)); + + /* Convert checksum to hexadecimal. */ + shatextlen = + hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf); + Assert(shatextlen + 1 == sizeof(shatextbuf)); + shatextbuf[shatextlen] = '\0'; + + /* Add to manifest. */ + appendStringInfo(manifest, "File\t%s\t%zu\t%s\t%s\n", + escaped_filename == NULL ? filename : escaped_filename, + size, timebuf, shatextbuf); + + /* Avoid leaking memory. */ + if (escaped_filename != NULL) + pfree(escaped_filename); +} + +/* + * Finalize the backup manifest, and send it to the client. + */ +static void +SendBackupManifest(StringInfo manifest) +{ + pg_sha256_ctx sha256_ctx; + uint8 shabuf[PG_SHA256_DIGEST_LENGTH]; + StringInfoData protobuf; + int shastringlen; + + /* Checksum the manifest. */ + pg_sha256_init(&sha256_ctx); + pg_sha256_update(&sha256_ctx, (uint8 *) manifest->data, manifest->len); + pg_sha256_final(&sha256_ctx, shabuf); + appendStringInfoString(manifest, "Manifest-Checksum\t"); + shastringlen = PG_SHA256_DIGEST_LENGTH * 2; + enlargeStringInfo(manifest, shastringlen); + shastringlen = hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, + manifest->data + manifest->len); + Assert(shastringlen == PG_SHA256_DIGEST_LENGTH * 2); + manifest->len += shastringlen; + appendStringInfoChar(manifest, '\n'); + + /* Send CopyOutResponse message */ + pq_beginmessage(&protobuf, 'H'); + pq_sendbyte(&protobuf, 0); /* overall format */ + pq_sendint16(&protobuf, 0); /* natts */ + pq_endmessage(&protobuf); + + /* Send CopyData message */ + pq_putmessage('d', manifest->data, manifest->len); + + /* And finally CopyDone message */ + pq_putemptymessage('c'); +} + +/* + * Escape a field for inclusion in a manifest. + * + * We use the following escaping rule: If a field contains \t, \r, or \n, + * the field must be surrounded by double-quotes, and any internal double + * quotes must be doubled. Otherwise, no escaping is required. + * + * The return value is a new palloc'd string with escaping added, or NULL + * if no escaping is required. + */ +static char * +escape_field_for_manifest(const char *s) +{ + bool escaping_required = false; + int escaped_length = 2; + const char *t; + char *result; + char *r; + + for (t = s; *t != '\0'; ++t) + { + if (*t == '\t' || *t == '\r' || *t == '\n') + escaping_required = true; + if (*t == '"') + ++escaped_length; + ++escaped_length; + } + + if (!escaping_required) + return NULL; + + result = palloc(escaped_length + 1); + result[0] = '"'; + result[escaped_length - 1] = '"'; + result[escaped_length] = '\0'; + r = result + 1; + + for (t = s; *t != '\0'; ++t) + { + *(r++) = *t; + if (*t == '"') + *(r++) = *t; + } + + Assert(r == &result[escaped_length - 1]); + + return result; +} + /* * Send a single resultset containing just a single * XLogRecPtr record (in text format) @@ -920,11 +1088,16 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli) * Inject a file with given name and content in the output tar stream. */ static void -sendFileWithContent(const char *filename, const char *content) +sendFileWithContent(const char *filename, const char *content, + StringInfo manifest) { struct stat statbuf; int pad, len; + pg_sha256_ctx sha256_ctx; + uint8 shabuf[PG_SHA256_DIGEST_LENGTH]; + + pg_sha256_init(&sha256_ctx); len = strlen(content); @@ -957,6 +1130,11 @@ sendFileWithContent(const char *filename, const char *content) MemSet(buf, 0, pad); pq_putmessage('d', buf, pad); } + + pg_sha256_update(&sha256_ctx, (uint8 *) content, len); + pg_sha256_final(&sha256_ctx, shabuf); + AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime, + shabuf); } /* @@ -967,7 +1145,7 @@ sendFileWithContent(const char *filename, const char *content) * Only used to send auxiliary tablespaces, not PGDATA. */ int64 -sendTablespace(char *path, bool sizeonly) +sendTablespace(char *path, char *oid, bool sizeonly, StringInfo manifest) { int64 size; char pathbuf[MAXPGPATH]; @@ -1000,7 +1178,7 @@ sendTablespace(char *path, bool sizeonly) sizeonly); /* Send all the files in the tablespace version directory */ - size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true); + size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest, oid); return size; } @@ -1019,7 +1197,7 @@ sendTablespace(char *path, bool sizeonly) */ static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, - bool sendtblspclinks) + bool sendtblspclinks, StringInfo manifest, const char *tsoid) { DIR *dir; struct dirent *de; @@ -1295,7 +1473,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, skip_this_dir = true; if (!skip_this_dir) - size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks); + size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, + sendtblspclinks, manifest, tsoid); } else if (S_ISREG(statbuf.st_mode)) { @@ -1303,7 +1482,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, if (!sizeonly) sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf, - true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid); + true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid, + manifest, tsoid); if (sent || sizeonly) { @@ -1366,8 +1546,9 @@ is_checksummed_file(const char *fullpath, const char *filename) * and the file did not exist. */ static bool -sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, - bool missing_ok, Oid dboid) +sendFile(const char *readfilename, const char *tarfilename, + struct stat *statbuf, bool missing_ok, Oid dboid, + StringInfo manifest, const char *tsoid) { FILE *fp; BlockNumber blkno = 0; @@ -1384,6 +1565,10 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf int segmentno = 0; char *segmentpath; bool verify_checksum = false; + pg_sha256_ctx sha256_ctx; + uint8 shabuf[PG_SHA256_DIGEST_LENGTH]; + + pg_sha256_init(&sha256_ctx); fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1553,6 +1738,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf ereport(ERROR, (errmsg("base backup could not send data, aborting backup"))); + /* Also feed it to the checksum machinery. */ + pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt); + len += cnt; throttle(cnt); @@ -1577,6 +1765,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { cnt = Min(sizeof(buf), statbuf->st_size - len); pq_putmessage('d', buf, cnt); + pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt); len += cnt; throttle(cnt); } @@ -1584,7 +1773,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf /* * Pad to 512 byte boundary, per tar format requirements. (This small - * piece of data is probably not worth throttling.) + * piece of data is probably not worth throttling, and is not checksummed + * because it's not actually part of the file.) */ pad = ((len + 511) & ~511) - len; if (pad > 0) @@ -1608,6 +1798,10 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf total_checksum_failures += checksum_failures; + pg_sha256_final(&sha256_ctx, shabuf); + AddFileToManifest(manifest, tsoid, tarfilename, statbuf->st_size, + statbuf->st_mtime, shabuf); + return true; } diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 0565212..c56246f 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -88,6 +88,12 @@ typedef struct UnpackTarState FILE *file; } UnpackTarState; +typedef struct WriteManifestState +{ + char filename[MAXPGPATH]; + FILE *file; +} WriteManifestState; + typedef void (*WriteDataCallback) (size_t nbytes, char *buf, void *callback_data); @@ -180,6 +186,12 @@ static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data); static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum); static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data); +static void ReceiveBackupManifest(PGconn *conn); +static void ReceiveBackupManifestChunk(size_t r, char *copybuf, + void *callback_data); +static void ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf); +static void ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf, + void *callback_data); static void BaseBackup(void); static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline, @@ -924,8 +936,8 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback, res = PQgetResult(conn); if (PQresultStatus(res) != PGRES_COPY_OUT) { - pg_log_error("could not get COPY data stream: %s", - PQerrorMessage(conn)); + pg_log_error("could not get COPY data stream: %s [%s]", + PQerrorMessage(conn), PQresStatus(PQresultStatus(res))); exit(1); } PQclear(res); @@ -1170,6 +1182,31 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) } } + /* + * Normally, we emit the backup manifest as a separate file, but when + * we're writing a tarfile to stdout, we don't have that option, so + * include it in the one tarfile we've got. + */ + if (strcmp(basedir, "-") == 0) + { + char header[512]; + PQExpBufferData buf; + + initPQExpBuffer(&buf); + ReceiveBackupManifestInMemory(conn, &buf); + if (PQExpBufferBroken(&buf)) + { + pg_log_error("out of memory"); + exit(1); + } + tarCreateHeader(header, "backup_manifest", NULL, buf.len, + pg_file_create_mode, 04000, 02000, + time(NULL)); + writeTarData(&state, header, sizeof(header)); + writeTarData(&state, buf.data, buf.len); + termPQExpBuffer(&buf); + } + /* 2 * 512 bytes empty data at end of file */ writeTarData(&state, zerobuf, sizeof(zerobuf)); @@ -1417,6 +1454,64 @@ get_tablespace_mapping(const char *dir) /* + * Receive the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifest(PGconn *conn) +{ + WriteManifestState state; + + snprintf(state.filename, sizeof(state.filename), + "%s/backup_manifest", basedir); + state.file = fopen(state.filename, "wb"); + if (state.file == NULL) + { + pg_log_error("could not create file \"%s\": %m", state.filename); + exit(1); + } + + ReceiveCopyData(conn, ReceiveBackupManifestChunk, &state); + + fclose(state.file); +} + +/* + * Receive one chunk of the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifestChunk(size_t r, char *copybuf, void *callback_data) +{ + WriteManifestState *state = callback_data; + + if (fwrite(copybuf, r, 1, state->file) != 1) + { + pg_log_error("could not write to file \"%s\": %m", state->filename); + exit(1); + } +} + +/* + * Receive the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf) +{ + ReceiveCopyData(conn, ReceiveBackupManifestInMemoryChunk, buf); +} + +/* + * Receive one chunk of the backup manifest file and write it out to a file. + */ +static void +ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf, + void *callback_data) +{ + PQExpBuffer buf = callback_data; + + appendPQExpBuffer(buf, copybuf, r); +} + +/* * Receive a tar format stream from the connection to the server, and unpack * the contents of it into a directory. Only files, directories and * symlinks are supported, no other kinds of special files. @@ -1658,6 +1753,7 @@ BaseBackup(void) maxServerMajor; int serverVersion, serverMajor; + int writing_to_stdout; Assert(conn != NULL); @@ -1821,7 +1917,8 @@ BaseBackup(void) /* * When writing to stdout, require a single tablespace */ - if (format == 't' && strcmp(basedir, "-") == 0 && PQntuples(res) > 1) + writing_to_stdout = format == 't' && strcmp(basedir, "-") == 0; + if (writing_to_stdout && PQntuples(res) > 1) { pg_log_error("can only write single tablespace to stdout, database has %d", PQntuples(res)); @@ -1850,6 +1947,19 @@ BaseBackup(void) ReceiveAndUnpackTarFile(conn, res, i); } /* Loop over all tablespaces */ + /* + * Now receive backup manifest, if appropriate. + * + * If we're writing a tarfile to stdout, ReceiveTarFile will have already + * processed the backup manifest and included it in the output tarfile. + * Such a configuration doesn't allow for writing multiple files. + * + * If we're talking to an older server, it won't send a backup manifest, + * so don't try to receive one. + */ + if (!writing_to_stdout && serverMajor >= 1300) + ReceiveBackupManifest(conn); + if (showprogress) { progress_report(PQntuples(res), NULL, true); diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h index 503a5b9..8fe0136 100644 --- a/src/include/replication/basebackup.h +++ b/src/include/replication/basebackup.h @@ -12,6 +12,7 @@ #ifndef _BASEBACKUP_H #define _BASEBACKUP_H +#include "lib/stringinfo.h" #include "nodes/replnodes.h" /* @@ -31,6 +32,7 @@ typedef struct extern void SendBaseBackup(BaseBackupCmd *cmd); -extern int64 sendTablespace(char *path, bool sizeonly); +extern int64 sendTablespace(char *path, char *oid, bool sizeonly, + StringInfo manifest); #endif /* _BASEBACKUP_H */ -- 1.8.3.1