My colleague Suraj did testing and noticed the performance impact
with the checksums.   On further testing, he found that specifically with
sha its more of performance impact.

Please find below statistics:

no of tables without checksum SHA256
checksum % performnce
overhead
with
SHA-256 md5 checksum % performnce
overhead with md5 CRC checksum % performnce
overhead with
CRC
10 (100 MB
in each table) real 0m10.957s
user 0m0.367s
sys 0m2.275s real 0m16.816s
user 0m0.210s
sys 0m2.067s 53% real 0m11.895s
user 0m0.174s
sys 0m1.725s 8% real 0m11.136s
user 0m0.365s
sys 0m2.298s 2%
20 (100 MB
in each table) real 0m20.610s
user 0m0.484s
sys 0m3.198s real 0m31.745s
user 0m0.569s
sys 0m4.089s
54% real 0m22.717s
user 0m0.638s
sys 0m4.026s 10% real 0m21.075s
user 0m0.538s
sys 0m3.417s 2%
50 (100 MB
in each table) real 0m49.143s
user 0m1.646s
sys 0m8.499s real 1m13.683s
user 0m1.305s
sys 0m10.541s 50% real 0m51.856s
user 0m0.932s
sys 0m7.702s 6% real 0m49.689s
user 0m1.028s
sys 0m6.921s 1%
100 (100 MB
in each table) real 1m34.308s
user 0m2.265s
sys 0m14.717s real 2m22.403s
user 0m2.613s
sys 0m20.776s 51% real 1m41.524s
user 0m2.158s
sys 0m15.949s
8% real 1m35.045s
user 0m2.061s
sys 0m16.308s 1%
100 (1 GB
in each table) real 17m18.336s
user 0m20.222s
sys 3m12.960s real 24m45.942s
user 0m26.911s
sys 3m33.501s 43% real 17m41.670s
user 0m26.506s
sys 3m18.402s 2% real 17m22.296s
user 0m26.811s
sys 3m56.653s

sometimes, this test
completes within the
same time as without
checksum. approx. 0.5%


Considering the above results, I modified the earlier Robert's patch and
added
"manifest_with_checksums" option to pg_basebackup.  With a new patch.
by default, checksums will be disabled and will be only enabled when
"manifest_with_checksums" option is provided.  Also re-based all patch set.



Regards,

-- 
Rushabh Lathia
www.EnterpriseDB.com

On Tue, Oct 1, 2019 at 5:43 PM Robert Haas <robertmh...@gmail.com> wrote:

> On Mon, Sep 30, 2019 at 5:31 AM Jeevan Chalke
> <jeevan.cha...@enterprisedb.com> wrote:
> > Entry for directory is not added in manifest. So it might be difficult
> > at client to get to know about the directories. Will it be good to add
> > an entry for each directory too? May be like:
> > Dir    <dirname> <mtime>
>
> Well, what kind of corruption would this allow us to detect that we
> can't detect as things stand? I think the only case is an empty
> directory. If it's not empty, we'd have some entries for the files in
> that directory, and those files won't be able to exist unless the
> directory does. But, how would we end up backing up an empty
> directory, anyway?
>
> I don't really *mind* adding directories into the manifest, but I'm
> not sure how much it helps.
>
> --
> Robert Haas
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
>
>
>

-- 
Rushabh Lathia
From 75bc29edb4d84697901d257ac98514c186c29508 Mon Sep 17 00:00:00 2001
From: Rushabh Lathia <rushabh.lat...@enterprisedb.com>
Date: Wed, 13 Nov 2019 15:19:22 +0530
Subject: [PATCH] Reduce code duplication and eliminate weird macro tricks.

---
 src/bin/pg_basebackup/pg_basebackup.c | 1005 +++++++++++++++++----------------
 1 file changed, 507 insertions(+), 498 deletions(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index a9d162a..0565212 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -57,6 +57,40 @@ typedef struct TablespaceList
 	TablespaceListCell *tail;
 } TablespaceList;
 
+typedef struct WriteTarState
+{
+	int			tablespacenum;
+	char		filename[MAXPGPATH];
+	FILE	   *tarfile;
+	char		tarhdr[512];
+	bool		basetablespace;
+	bool		in_tarhdr;
+	bool		skip_file;
+	bool		is_recovery_guc_supported;
+	bool		is_postgresql_auto_conf;
+	bool		found_postgresql_auto_conf;
+	int			file_padding_len;
+	size_t		tarhdrsz;
+	pgoff_t		filesz;
+#ifdef HAVE_LIBZ
+	gzFile		ztarfile;
+#endif
+}			WriteTarState;
+
+typedef struct UnpackTarState
+{
+	int			tablespacenum;
+	char		current_path[MAXPGPATH];
+	char		filename[MAXPGPATH];
+	const char *mapped_tblspc_path;
+	pgoff_t		current_len_left;
+	int			current_padding;
+	FILE	   *file;
+}			UnpackTarState;
+
+typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
+								   void *callback_data);
+
 /*
  * pg_xlog has been renamed to pg_wal in version 10.  This version number
  * should be compared with PQserverVersion().
@@ -142,7 +176,10 @@ static void verify_dir_is_empty_or_create(char *dirname, bool *created, bool *fo
 static void progress_report(int tablespacenum, const char *filename, bool force);
 
 static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
+static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
 static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
+static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf,
+										 void *callback_data);
 static void BaseBackup(void);
 
 static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline,
@@ -874,42 +911,78 @@ parse_max_rate(char *src)
 }
 
 /*
+ * Read a stream of COPY data and invoke the provided callback for each
+ * chunk.
+ */
+static void
+ReceiveCopyData(PGconn *conn, WriteDataCallback callback,
+				void *callback_data)
+{
+	PGresult   *res;
+
+	/* Get the COPY data stream. */
+	res = PQgetResult(conn);
+	if (PQresultStatus(res) != PGRES_COPY_OUT)
+	{
+		pg_log_error("could not get COPY data stream: %s",
+					 PQerrorMessage(conn));
+		exit(1);
+	}
+	PQclear(res);
+
+	/* Loop over chunks until done. */
+	while (1)
+	{
+		int			r;
+		char	   *copybuf;
+
+		r = PQgetCopyData(conn, &copybuf, 0);
+		if (r == -1)
+		{
+			/* End of chunk. */
+			break;
+		}
+		else if (r == -2)
+		{
+			pg_log_error("could not read COPY data: %s",
+						 PQerrorMessage(conn));
+			exit(1);
+		}
+
+		(*callback) (r, copybuf, callback_data);
+
+		PQfreemem(copybuf);
+	}
+}
+
+/*
  * Write a piece of tar data
  */
 static void
-writeTarData(
-#ifdef HAVE_LIBZ
-			 gzFile ztarfile,
-#endif
-			 FILE *tarfile, char *buf, int r, char *current_file)
+writeTarData(WriteTarState * state, char *buf, int r)
 {
 #ifdef HAVE_LIBZ
-	if (ztarfile != NULL)
+	if (state->ztarfile != NULL)
 	{
-		if (gzwrite(ztarfile, buf, r) != r)
+		if (gzwrite(state->ztarfile, buf, r) != r)
 		{
 			pg_log_error("could not write to compressed file \"%s\": %s",
-						 current_file, get_gz_error(ztarfile));
+						 state->filename, get_gz_error(state->ztarfile));
 			exit(1);
 		}
 	}
 	else
 #endif
 	{
-		if (fwrite(buf, r, 1, tarfile) != 1)
+		if (fwrite(buf, r, 1, state->tarfile) != 1)
 		{
-			pg_log_error("could not write to file \"%s\": %m", current_file);
+			pg_log_error("could not write to file \"%s\": %m",
+						 state->filename);
 			exit(1);
 		}
 	}
 }
 
-#ifdef HAVE_LIBZ
-#define WRITE_TAR_DATA(buf, sz) writeTarData(ztarfile, tarfile, buf, sz, filename)
-#else
-#define WRITE_TAR_DATA(buf, sz) writeTarData(tarfile, buf, sz, filename)
-#endif
-
 /*
  * Receive a tar format file from the connection to the server, and write
  * the data from this file directly into a tar file. If compression is
@@ -923,29 +996,19 @@ writeTarData(
 static void
 ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 {
-	char		filename[MAXPGPATH];
-	char	   *copybuf = NULL;
-	FILE	   *tarfile = NULL;
-	char		tarhdr[512];
-	bool		basetablespace = PQgetisnull(res, rownum, 0);
-	bool		in_tarhdr = true;
-	bool		skip_file = false;
-	bool		is_recovery_guc_supported = true;
-	bool		is_postgresql_auto_conf = false;
-	bool		found_postgresql_auto_conf = false;
-	int			file_padding_len = 0;
-	size_t		tarhdrsz = 0;
-	pgoff_t		filesz = 0;
+	char		zerobuf[1024];
+	WriteTarState state;
 
-#ifdef HAVE_LIBZ
-	gzFile		ztarfile = NULL;
-#endif
+	memset(&state, 0, sizeof(state));
+	state.tablespacenum = rownum;
+	state.basetablespace = PQgetisnull(res, rownum, 0);
+	state.in_tarhdr = true;
 
 	/* recovery.conf is integrated into postgresql.conf in 12 and newer */
-	if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_RECOVERY_GUC)
-		is_recovery_guc_supported = false;
+	if (PQserverVersion(conn) >= MINIMUM_VERSION_FOR_RECOVERY_GUC)
+		state.is_recovery_guc_supported = true;
 
-	if (basetablespace)
+	if (state.basetablespace)
 	{
 		/*
 		 * Base tablespaces
@@ -959,40 +1022,42 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 #ifdef HAVE_LIBZ
 			if (compresslevel != 0)
 			{
-				ztarfile = gzdopen(dup(fileno(stdout)), "wb");
-				if (gzsetparams(ztarfile, compresslevel,
+				state.ztarfile = gzdopen(dup(fileno(stdout)), "wb");
+				if (gzsetparams(state.ztarfile, compresslevel,
 								Z_DEFAULT_STRATEGY) != Z_OK)
 				{
 					pg_log_error("could not set compression level %d: %s",
-								 compresslevel, get_gz_error(ztarfile));
+								 compresslevel, get_gz_error(state.ztarfile));
 					exit(1);
 				}
 			}
 			else
 #endif
-				tarfile = stdout;
-			strcpy(filename, "-");
+				state.tarfile = stdout;
+			strcpy(state.filename, "-");
 		}
 		else
 		{
 #ifdef HAVE_LIBZ
 			if (compresslevel != 0)
 			{
-				snprintf(filename, sizeof(filename), "%s/base.tar.gz", basedir);
-				ztarfile = gzopen(filename, "wb");
-				if (gzsetparams(ztarfile, compresslevel,
+				snprintf(state.filename, sizeof(state.filename),
+						 "%s/base.tar.gz", basedir);
+				state.ztarfile = gzopen(state.filename, "wb");
+				if (gzsetparams(state.ztarfile, compresslevel,
 								Z_DEFAULT_STRATEGY) != Z_OK)
 				{
 					pg_log_error("could not set compression level %d: %s",
-								 compresslevel, get_gz_error(ztarfile));
+								 compresslevel, get_gz_error(state.ztarfile));
 					exit(1);
 				}
 			}
 			else
 #endif
 			{
-				snprintf(filename, sizeof(filename), "%s/base.tar", basedir);
-				tarfile = fopen(filename, "wb");
+				snprintf(state.filename, sizeof(state.filename),
+						 "%s/base.tar", basedir);
+				state.tarfile = fopen(state.filename, "wb");
 			}
 		}
 	}
@@ -1004,34 +1069,35 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 #ifdef HAVE_LIBZ
 		if (compresslevel != 0)
 		{
-			snprintf(filename, sizeof(filename), "%s/%s.tar.gz", basedir,
-					 PQgetvalue(res, rownum, 0));
-			ztarfile = gzopen(filename, "wb");
-			if (gzsetparams(ztarfile, compresslevel,
+			snprintf(state.filename, sizeof(state.filename),
+					 "%s/%s.tar.gz",
+					 basedir, PQgetvalue(res, rownum, 0));
+			state.ztarfile = gzopen(state.filename, "wb");
+			if (gzsetparams(state.ztarfile, compresslevel,
 							Z_DEFAULT_STRATEGY) != Z_OK)
 			{
 				pg_log_error("could not set compression level %d: %s",
-							 compresslevel, get_gz_error(ztarfile));
+							 compresslevel, get_gz_error(state.ztarfile));
 				exit(1);
 			}
 		}
 		else
 #endif
 		{
-			snprintf(filename, sizeof(filename), "%s/%s.tar", basedir,
-					 PQgetvalue(res, rownum, 0));
-			tarfile = fopen(filename, "wb");
+			snprintf(state.filename, sizeof(state.filename), "%s/%s.tar",
+					 basedir, PQgetvalue(res, rownum, 0));
+			state.tarfile = fopen(state.filename, "wb");
 		}
 	}
 
 #ifdef HAVE_LIBZ
 	if (compresslevel != 0)
 	{
-		if (!ztarfile)
+		if (!state.ztarfile)
 		{
 			/* Compression is in use */
 			pg_log_error("could not create compressed file \"%s\": %s",
-						 filename, get_gz_error(ztarfile));
+						 state.filename, get_gz_error(state.ztarfile));
 			exit(1);
 		}
 	}
@@ -1039,314 +1105,292 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 #endif
 	{
 		/* Either no zlib support, or zlib support but compresslevel = 0 */
-		if (!tarfile)
+		if (!state.tarfile)
 		{
-			pg_log_error("could not create file \"%s\": %m", filename);
+			pg_log_error("could not create file \"%s\": %m", state.filename);
 			exit(1);
 		}
 	}
 
+	ReceiveCopyData(conn, ReceiveTarCopyChunk, &state);
+
 	/*
-	 * Get the COPY data stream
+	 * End of copy data. If requested, and this is the base tablespace, write
+	 * configuration file into the tarfile. When done, close the file (but not
+	 * stdout).
+	 *
+	 * Also, write two completely empty blocks at the end of the tar file, as
+	 * required by some tar programs.
 	 */
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_COPY_OUT)
-	{
-		pg_log_error("could not get COPY data stream: %s",
-					 PQerrorMessage(conn));
-		exit(1);
-	}
 
-	while (1)
+	MemSet(zerobuf, 0, sizeof(zerobuf));
+
+	if (state.basetablespace && writerecoveryconf)
 	{
-		int			r;
+		char		header[512];
 
-		if (copybuf != NULL)
+		/*
+		 * If postgresql.auto.conf has not been found in the streamed data,
+		 * add recovery configuration to postgresql.auto.conf if recovery
+		 * parameters are GUCs.  If the instance connected to is older than
+		 * 12, create recovery.conf with this data otherwise.
+		 */
+		if (!state.found_postgresql_auto_conf || !state.is_recovery_guc_supported)
 		{
-			PQfreemem(copybuf);
-			copybuf = NULL;
+			int			padding;
+
+			tarCreateHeader(header,
+							state.is_recovery_guc_supported ? "postgresql.auto.conf" : "recovery.conf",
+							NULL,
+							recoveryconfcontents->len,
+							pg_file_create_mode, 04000, 02000,
+							time(NULL));
+
+			padding = ((recoveryconfcontents->len + 511) & ~511) - recoveryconfcontents->len;
+
+			writeTarData(&state, header, sizeof(header));
+			writeTarData(&state, recoveryconfcontents->data,
+						 recoveryconfcontents->len);
+			if (padding)
+				writeTarData(&state, zerobuf, padding);
 		}
 
-		r = PQgetCopyData(conn, &copybuf, 0);
-		if (r == -1)
+		/*
+		 * standby.signal is supported only if recovery parameters are GUCs.
+		 */
+		if (state.is_recovery_guc_supported)
 		{
-			/*
-			 * End of chunk. If requested, and this is the base tablespace,
-			 * write configuration file into the tarfile. When done, close the
-			 * file (but not stdout).
-			 *
-			 * Also, write two completely empty blocks at the end of the tar
-			 * file, as required by some tar programs.
-			 */
-			char		zerobuf[1024];
+			tarCreateHeader(header, "standby.signal", NULL,
+							0,	/* zero-length file */
+							pg_file_create_mode, 04000, 02000,
+							time(NULL));
+
+			writeTarData(&state, header, sizeof(header));
+			writeTarData(&state, zerobuf, 511);
+		}
+	}
 
-			MemSet(zerobuf, 0, sizeof(zerobuf));
+	/* 2 * 512 bytes empty data at end of file */
+	writeTarData(&state, zerobuf, sizeof(zerobuf));
 
-			if (basetablespace && writerecoveryconf)
+#ifdef HAVE_LIBZ
+	if (state.ztarfile != NULL)
+	{
+		if (gzclose(state.ztarfile) != 0)
+		{
+			pg_log_error("could not close compressed file \"%s\": %s",
+						 state.filename, get_gz_error(state.ztarfile));
+			exit(1);
+		}
+	}
+	else
+#endif
+	{
+		if (strcmp(basedir, "-") != 0)
+		{
+			if (fclose(state.tarfile) != 0)
 			{
-				char		header[512];
+				pg_log_error("could not close file \"%s\": %m",
+							 state.filename);
+				exit(1);
+			}
+		}
+	}
 
-				/*
-				 * If postgresql.auto.conf has not been found in the streamed
-				 * data, add recovery configuration to postgresql.auto.conf if
-				 * recovery parameters are GUCs.  If the instance connected to
-				 * is older than 12, create recovery.conf with this data
-				 * otherwise.
-				 */
-				if (!found_postgresql_auto_conf || !is_recovery_guc_supported)
-				{
-					int			padding;
+	progress_report(rownum, state.filename, true);
 
-					tarCreateHeader(header,
-									is_recovery_guc_supported ? "postgresql.auto.conf" : "recovery.conf",
-									NULL,
-									recoveryconfcontents->len,
-									pg_file_create_mode, 04000, 02000,
-									time(NULL));
+	/*
+	 * Do not sync the resulting tar file yet, all files are synced once at
+	 * the end.
+	 */
+}
 
-					padding = ((recoveryconfcontents->len + 511) & ~511) - recoveryconfcontents->len;
+/*
+ * Receive one chunk of tar-format data from the server.
+ */
+static void
+ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data)
+{
+	WriteTarState *state = callback_data;
 
-					WRITE_TAR_DATA(header, sizeof(header));
-					WRITE_TAR_DATA(recoveryconfcontents->data,
-								   recoveryconfcontents->len);
-					if (padding)
-						WRITE_TAR_DATA(zerobuf, padding);
-				}
+	if (!writerecoveryconf || !state->basetablespace)
+	{
+		/*
+		 * When not writing config file, or when not working on the base
+		 * tablespace, we never have to look for an existing configuration
+		 * file in the stream.
+		 */
+		writeTarData(state, copybuf, r);
+	}
+	else
+	{
+		/*
+		 * Look for a config file in the existing tar stream. If it's there,
+		 * we must skip it so we can later overwrite it with our own version
+		 * of the file.
+		 *
+		 * To do this, we have to process the individual files inside the TAR
+		 * stream. The stream consists of a header and zero or more chunks,
+		 * all 512 bytes long. The stream from the server is broken up into
+		 * smaller pieces, so we have to track the size of the files to find
+		 * the next header structure.
+		 */
+		int			rr = r;
+		int			pos = 0;
 
+		while (rr > 0)
+		{
+			if (state->in_tarhdr)
+			{
 				/*
-				 * standby.signal is supported only if recovery parameters are
-				 * GUCs.
+				 * We're currently reading a header structure inside the TAR
+				 * stream, i.e. the file metadata.
 				 */
-				if (is_recovery_guc_supported)
+				if (state->tarhdrsz < 512)
 				{
-					tarCreateHeader(header, "standby.signal", NULL,
-									0,	/* zero-length file */
-									pg_file_create_mode, 04000, 02000,
-									time(NULL));
+					/*
+					 * Copy the header structure into tarhdr in case the
+					 * header is not aligned to 512 bytes or it's not returned
+					 * in whole by the last PQgetCopyData call.
+					 */
+					int			hdrleft;
+					int			bytes2copy;
 
-					WRITE_TAR_DATA(header, sizeof(header));
-					WRITE_TAR_DATA(zerobuf, 511);
-				}
-			}
+					hdrleft = 512 - state->tarhdrsz;
+					bytes2copy = (rr > hdrleft ? hdrleft : rr);
 
-			/* 2 * 512 bytes empty data at end of file */
-			WRITE_TAR_DATA(zerobuf, sizeof(zerobuf));
+					memcpy(&state->tarhdr[state->tarhdrsz], copybuf + pos,
+						   bytes2copy);
 
-#ifdef HAVE_LIBZ
-			if (ztarfile != NULL)
-			{
-				if (gzclose(ztarfile) != 0)
-				{
-					pg_log_error("could not close compressed file \"%s\": %s",
-								 filename, get_gz_error(ztarfile));
-					exit(1);
+					rr -= bytes2copy;
+					pos += bytes2copy;
+					state->tarhdrsz += bytes2copy;
 				}
-			}
-			else
-#endif
-			{
-				if (strcmp(basedir, "-") != 0)
-				{
-					if (fclose(tarfile) != 0)
-					{
-						pg_log_error("could not close file \"%s\": %m",
-									 filename);
-						exit(1);
-					}
-				}
-			}
-
-			break;
-		}
-		else if (r == -2)
-		{
-			pg_log_error("could not read COPY data: %s",
-						 PQerrorMessage(conn));
-			exit(1);
-		}
-
-		if (!writerecoveryconf || !basetablespace)
-		{
-			/*
-			 * When not writing config file, or when not working on the base
-			 * tablespace, we never have to look for an existing configuration
-			 * file in the stream.
-			 */
-			WRITE_TAR_DATA(copybuf, r);
-		}
-		else
-		{
-			/*
-			 * Look for a config file in the existing tar stream. If it's
-			 * there, we must skip it so we can later overwrite it with our
-			 * own version of the file.
-			 *
-			 * To do this, we have to process the individual files inside the
-			 * TAR stream. The stream consists of a header and zero or more
-			 * chunks, all 512 bytes long. The stream from the server is
-			 * broken up into smaller pieces, so we have to track the size of
-			 * the files to find the next header structure.
-			 */
-			int			rr = r;
-			int			pos = 0;
-
-			while (rr > 0)
-			{
-				if (in_tarhdr)
+				else
 				{
 					/*
-					 * We're currently reading a header structure inside the
-					 * TAR stream, i.e. the file metadata.
+					 * We have the complete header structure in tarhdr, look
+					 * at the file metadata: we may want append recovery info
+					 * into postgresql.auto.conf and skip standby.signal file
+					 * if recovery parameters are integrated as GUCs, and
+					 * recovery.conf otherwise. In both cases we must
+					 * calculate tar padding.
 					 */
-					if (tarhdrsz < 512)
+					if (state->is_recovery_guc_supported)
 					{
-						/*
-						 * Copy the header structure into tarhdr in case the
-						 * header is not aligned to 512 bytes or it's not
-						 * returned in whole by the last PQgetCopyData call.
-						 */
-						int			hdrleft;
-						int			bytes2copy;
-
-						hdrleft = 512 - tarhdrsz;
-						bytes2copy = (rr > hdrleft ? hdrleft : rr);
-
-						memcpy(&tarhdr[tarhdrsz], copybuf + pos, bytes2copy);
-
-						rr -= bytes2copy;
-						pos += bytes2copy;
-						tarhdrsz += bytes2copy;
+						state->skip_file =
+							(strcmp(&state->tarhdr[0], "standby.signal") == 0);
+						state->is_postgresql_auto_conf =
+							(strcmp(&state->tarhdr[0], "postgresql.auto.conf") == 0);
 					}
 					else
-					{
-						/*
-						 * We have the complete header structure in tarhdr,
-						 * look at the file metadata: we may want append
-						 * recovery info into postgresql.auto.conf and skip
-						 * standby.signal file if recovery parameters are
-						 * integrated as GUCs, and recovery.conf otherwise. In
-						 * both cases we must calculate tar padding.
-						 */
-						if (is_recovery_guc_supported)
-						{
-							skip_file = (strcmp(&tarhdr[0], "standby.signal") == 0);
-							is_postgresql_auto_conf = (strcmp(&tarhdr[0], "postgresql.auto.conf") == 0);
-						}
-						else
-							skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0);
+						state->skip_file =
+							(strcmp(&state->tarhdr[0], "recovery.conf") == 0);
 
-						filesz = read_tar_number(&tarhdr[124], 12);
-						file_padding_len = ((filesz + 511) & ~511) - filesz;
+					state->filesz = read_tar_number(&state->tarhdr[124], 12);
+					state->file_padding_len =
+						((state->filesz + 511) & ~511) - state->filesz;
 
-						if (is_recovery_guc_supported &&
-							is_postgresql_auto_conf &&
-							writerecoveryconf)
-						{
-							/* replace tar header */
-							char		header[512];
+					if (state->is_recovery_guc_supported &&
+						state->is_postgresql_auto_conf &&
+						writerecoveryconf)
+					{
+						/* replace tar header */
+						char		header[512];
 
-							tarCreateHeader(header, "postgresql.auto.conf", NULL,
-											filesz + recoveryconfcontents->len,
-											pg_file_create_mode, 04000, 02000,
-											time(NULL));
+						tarCreateHeader(header, "postgresql.auto.conf", NULL,
+										state->filesz + recoveryconfcontents->len,
+										pg_file_create_mode, 04000, 02000,
+										time(NULL));
 
-							WRITE_TAR_DATA(header, sizeof(header));
-						}
-						else
+						writeTarData(state, header, sizeof(header));
+					}
+					else
+					{
+						/* copy stream with padding */
+						state->filesz += state->file_padding_len;
+
+						if (!state->skip_file)
 						{
-							/* copy stream with padding */
-							filesz += file_padding_len;
-
-							if (!skip_file)
-							{
-								/*
-								 * If we're not skipping the file, write the
-								 * tar header unmodified.
-								 */
-								WRITE_TAR_DATA(tarhdr, 512);
-							}
+							/*
+							 * If we're not skipping the file, write the tar
+							 * header unmodified.
+							 */
+							writeTarData(state, state->tarhdr, 512);
 						}
-
-						/* Next part is the file, not the header */
-						in_tarhdr = false;
 					}
+
+					/* Next part is the file, not the header */
+					state->in_tarhdr = false;
 				}
-				else
+			}
+			else
+			{
+				/*
+				 * We're processing a file's contents.
+				 */
+				if (state->filesz > 0)
 				{
 					/*
-					 * We're processing a file's contents.
+					 * We still have data to read (and possibly write).
 					 */
-					if (filesz > 0)
-					{
-						/*
-						 * We still have data to read (and possibly write).
-						 */
-						int			bytes2write;
+					int			bytes2write;
 
-						bytes2write = (filesz > rr ? rr : filesz);
+					bytes2write = (state->filesz > rr ? rr : state->filesz);
 
-						if (!skip_file)
-							WRITE_TAR_DATA(copybuf + pos, bytes2write);
+					if (!state->skip_file)
+						writeTarData(state, copybuf + pos, bytes2write);
 
-						rr -= bytes2write;
-						pos += bytes2write;
-						filesz -= bytes2write;
-					}
-					else if (is_recovery_guc_supported &&
-							 is_postgresql_auto_conf &&
-							 writerecoveryconf)
-					{
-						/* append recovery config to postgresql.auto.conf */
-						int			padding;
-						int			tailsize;
+					rr -= bytes2write;
+					pos += bytes2write;
+					state->filesz -= bytes2write;
+				}
+				else if (state->is_recovery_guc_supported &&
+						 state->is_postgresql_auto_conf &&
+						 writerecoveryconf)
+				{
+					/* append recovery config to postgresql.auto.conf */
+					int			padding;
+					int			tailsize;
 
-						tailsize = (512 - file_padding_len) + recoveryconfcontents->len;
-						padding = ((tailsize + 511) & ~511) - tailsize;
+					tailsize = (512 - state->file_padding_len) + recoveryconfcontents->len;
+					padding = ((tailsize + 511) & ~511) - tailsize;
 
-						WRITE_TAR_DATA(recoveryconfcontents->data, recoveryconfcontents->len);
+					writeTarData(state, recoveryconfcontents->data,
+								 recoveryconfcontents->len);
 
-						if (padding)
-						{
-							char		zerobuf[512];
+					if (padding)
+					{
+						char		zerobuf[512];
 
-							MemSet(zerobuf, 0, sizeof(zerobuf));
-							WRITE_TAR_DATA(zerobuf, padding);
-						}
+						MemSet(zerobuf, 0, sizeof(zerobuf));
+						writeTarData(state, zerobuf, padding);
+					}
 
-						/* skip original file padding */
-						is_postgresql_auto_conf = false;
-						skip_file = true;
-						filesz += file_padding_len;
+					/* skip original file padding */
+					state->is_postgresql_auto_conf = false;
+					state->skip_file = true;
+					state->filesz += state->file_padding_len;
 
-						found_postgresql_auto_conf = true;
-					}
-					else
-					{
-						/*
-						 * No more data in the current file, the next piece of
-						 * data (if any) will be a new file header structure.
-						 */
-						in_tarhdr = true;
-						skip_file = false;
-						is_postgresql_auto_conf = false;
-						tarhdrsz = 0;
-						filesz = 0;
-					}
+					state->found_postgresql_auto_conf = true;
+				}
+				else
+				{
+					/*
+					 * No more data in the current file, the next piece of
+					 * data (if any) will be a new file header structure.
+					 */
+					state->in_tarhdr = true;
+					state->skip_file = false;
+					state->is_postgresql_auto_conf = false;
+					state->tarhdrsz = 0;
+					state->filesz = 0;
 				}
 			}
 		}
-		totaldone += r;
-		progress_report(rownum, filename, false);
-	}							/* while (1) */
-	progress_report(rownum, filename, true);
-
-	if (copybuf != NULL)
-		PQfreemem(copybuf);
-
-	/*
-	 * Do not sync the resulting tar file yet, all files are synced once at
-	 * the end.
-	 */
+	}
+	totaldone += r;
+	progress_report(state->tablespacenum, state->filename, false);
 }
 
 
@@ -1384,254 +1428,219 @@ get_tablespace_mapping(const char *dir)
 static void
 ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
 {
-	char		current_path[MAXPGPATH];
-	char		filename[MAXPGPATH];
-	const char *mapped_tblspc_path;
-	pgoff_t		current_len_left = 0;
-	int			current_padding = 0;
+	UnpackTarState state;
 	bool		basetablespace;
-	char	   *copybuf = NULL;
-	FILE	   *file = NULL;
+
+	memset(&state, 0, sizeof(state));
+	state.tablespacenum = rownum;
 
 	basetablespace = PQgetisnull(res, rownum, 0);
 	if (basetablespace)
-		strlcpy(current_path, basedir, sizeof(current_path));
+		strlcpy(state.current_path, basedir, sizeof(state.current_path));
 	else
-		strlcpy(current_path,
+		strlcpy(state.current_path,
 				get_tablespace_mapping(PQgetvalue(res, rownum, 1)),
-				sizeof(current_path));
+				sizeof(state.current_path));
 
-	/*
-	 * Get the COPY data
-	 */
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_COPY_OUT)
+	ReceiveCopyData(conn, ReceiveTarAndUnpackCopyChunk, &state);
+
+
+	if (state.file)
+		fclose(state.file);
+
+	progress_report(rownum, state.filename, true);
+
+	if (state.file != NULL)
 	{
-		pg_log_error("could not get COPY data stream: %s",
-					 PQerrorMessage(conn));
+		pg_log_error("COPY stream ended before last file was finished");
 		exit(1);
 	}
 
-	while (1)
-	{
-		int			r;
+	if (basetablespace && writerecoveryconf)
+		WriteRecoveryConfig(conn, basedir, recoveryconfcontents);
 
-		if (copybuf != NULL)
-		{
-			PQfreemem(copybuf);
-			copybuf = NULL;
-		}
+	/*
+	 * No data is synced here, everything is done for all tablespaces at the
+	 * end.
+	 */
+}
 
-		r = PQgetCopyData(conn, &copybuf, 0);
+static void
+ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf, void *callback_data)
+{
+	UnpackTarState *state = callback_data;
 
-		if (r == -1)
-		{
-			/*
-			 * End of chunk
-			 */
-			if (file)
-				fclose(file);
+	if (state->file == NULL)
+	{
+#ifndef WIN32
+		int			filemode;
+#endif
 
-			break;
-		}
-		else if (r == -2)
+		/*
+		 * No current file, so this must be the header for a new file
+		 */
+		if (r != 512)
 		{
-			pg_log_error("could not read COPY data: %s",
-						 PQerrorMessage(conn));
+			pg_log_error("invalid tar block header size: %zu", r);
 			exit(1);
 		}
+		totaldone += 512;
 
-		if (file == NULL)
-		{
-#ifndef WIN32
-			int			filemode;
-#endif
-
-			/*
-			 * No current file, so this must be the header for a new file
-			 */
-			if (r != 512)
-			{
-				pg_log_error("invalid tar block header size: %d", r);
-				exit(1);
-			}
-			totaldone += 512;
-
-			current_len_left = read_tar_number(&copybuf[124], 12);
+		state->current_len_left = read_tar_number(&copybuf[124], 12);
 
 #ifndef WIN32
-			/* Set permissions on the file */
-			filemode = read_tar_number(&copybuf[100], 8);
+		/* Set permissions on the file */
+		filemode = read_tar_number(&copybuf[100], 8);
 #endif
 
-			/*
-			 * All files are padded up to 512 bytes
-			 */
-			current_padding =
-				((current_len_left + 511) & ~511) - current_len_left;
+		/*
+		 * All files are padded up to 512 bytes
+		 */
+		state->current_padding =
+			((state->current_len_left + 511) & ~511) - state->current_len_left;
 
+		/*
+		 * First part of header is zero terminated filename
+		 */
+		snprintf(state->filename, sizeof(state->filename),
+				 "%s/%s", state->current_path, copybuf);
+		if (state->filename[strlen(state->filename) - 1] == '/')
+		{
 			/*
-			 * First part of header is zero terminated filename
+			 * Ends in a slash means directory or symlink to directory
 			 */
-			snprintf(filename, sizeof(filename), "%s/%s", current_path,
-					 copybuf);
-			if (filename[strlen(filename) - 1] == '/')
+			if (copybuf[156] == '5')
 			{
 				/*
-				 * Ends in a slash means directory or symlink to directory
+				 * Directory. Remove trailing slash first.
 				 */
-				if (copybuf[156] == '5')
+				state->filename[strlen(state->filename) - 1] = '\0';
+				if (mkdir(state->filename, pg_dir_create_mode) != 0)
 				{
 					/*
-					 * Directory
+					 * When streaming WAL, pg_wal (or pg_xlog for pre-9.6
+					 * clusters) will have been created by the wal receiver
+					 * process. Also, when the WAL directory location was
+					 * specified, pg_wal (or pg_xlog) has already been created
+					 * as a symbolic link before starting the actual backup.
+					 * So just ignore creation failures on related
+					 * directories.
 					 */
-					filename[strlen(filename) - 1] = '\0';	/* Remove trailing slash */
-					if (mkdir(filename, pg_dir_create_mode) != 0)
+					if (!((pg_str_endswith(state->filename, "/pg_wal") ||
+						   pg_str_endswith(state->filename, "/pg_xlog") ||
+						   pg_str_endswith(state->filename, "/archive_status")) &&
+						  errno == EEXIST))
 					{
-						/*
-						 * When streaming WAL, pg_wal (or pg_xlog for pre-9.6
-						 * clusters) will have been created by the wal
-						 * receiver process. Also, when the WAL directory
-						 * location was specified, pg_wal (or pg_xlog) has
-						 * already been created as a symbolic link before
-						 * starting the actual backup. So just ignore creation
-						 * failures on related directories.
-						 */
-						if (!((pg_str_endswith(filename, "/pg_wal") ||
-							   pg_str_endswith(filename, "/pg_xlog") ||
-							   pg_str_endswith(filename, "/archive_status")) &&
-							  errno == EEXIST))
-						{
-							pg_log_error("could not create directory \"%s\": %m",
-										 filename);
-							exit(1);
-						}
+						pg_log_error("could not create directory \"%s\": %m",
+									 state->filename);
+						exit(1);
 					}
+				}
 #ifndef WIN32
-					if (chmod(filename, (mode_t) filemode))
-						pg_log_error("could not set permissions on directory \"%s\": %m",
-									 filename);
+				if (chmod(state->filename, (mode_t) filemode))
+					pg_log_error("could not set permissions on directory \"%s\": %m",
+								 state->filename);
 #endif
-				}
-				else if (copybuf[156] == '2')
-				{
-					/*
-					 * Symbolic link
-					 *
-					 * It's most likely a link in pg_tblspc directory, to the
-					 * location of a tablespace. Apply any tablespace mapping
-					 * given on the command line (--tablespace-mapping). (We
-					 * blindly apply the mapping without checking that the
-					 * link really is inside pg_tblspc. We don't expect there
-					 * to be other symlinks in a data directory, but if there
-					 * are, you can call it an undocumented feature that you
-					 * can map them too.)
-					 */
-					filename[strlen(filename) - 1] = '\0';	/* Remove trailing slash */
+			}
+			else if (copybuf[156] == '2')
+			{
+				/*
+				 * Symbolic link
+				 *
+				 * It's most likely a link in pg_tblspc directory, to the
+				 * location of a tablespace. Apply any tablespace mapping
+				 * given on the command line (--tablespace-mapping). (We
+				 * blindly apply the mapping without checking that the link
+				 * really is inside pg_tblspc. We don't expect there to be
+				 * other symlinks in a data directory, but if there are, you
+				 * can call it an undocumented feature that you can map them
+				 * too.)
+				 */
+				state->filename[strlen(state->filename) - 1] = '\0';	/* Remove trailing slash */
 
-					mapped_tblspc_path = get_tablespace_mapping(&copybuf[157]);
-					if (symlink(mapped_tblspc_path, filename) != 0)
-					{
-						pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m",
-									 filename, mapped_tblspc_path);
-						exit(1);
-					}
-				}
-				else
+				state->mapped_tblspc_path =
+					get_tablespace_mapping(&copybuf[157]);
+				if (symlink(state->mapped_tblspc_path, state->filename) != 0)
 				{
-					pg_log_error("unrecognized link indicator \"%c\"",
-								 copybuf[156]);
+					pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m",
+								 state->filename, state->mapped_tblspc_path);
 					exit(1);
 				}
-				continue;		/* directory or link handled */
 			}
-
-			/*
-			 * regular file
-			 */
-			file = fopen(filename, "wb");
-			if (!file)
+			else
 			{
-				pg_log_error("could not create file \"%s\": %m", filename);
+				pg_log_error("unrecognized link indicator \"%c\"",
+							 copybuf[156]);
 				exit(1);
 			}
+			return;				/* directory or link handled */
+		}
+
+		/*
+		 * regular file
+		 */
+		state->file = fopen(state->filename, "wb");
+		if (!state->file)
+		{
+			pg_log_error("could not create file \"%s\": %m", state->filename);
+			exit(1);
+		}
 
 #ifndef WIN32
-			if (chmod(filename, (mode_t) filemode))
-				pg_log_error("could not set permissions on file \"%s\": %m",
-							 filename);
+		if (chmod(state->filename, (mode_t) filemode))
+			pg_log_error("could not set permissions on file \"%s\": %m",
+						 state->filename);
 #endif
 
-			if (current_len_left == 0)
-			{
-				/*
-				 * Done with this file, next one will be a new tar header
-				 */
-				fclose(file);
-				file = NULL;
-				continue;
-			}
-		}						/* new file */
-		else
+		if (state->current_len_left == 0)
 		{
 			/*
-			 * Continuing blocks in existing file
+			 * Done with this file, next one will be a new tar header
 			 */
-			if (current_len_left == 0 && r == current_padding)
-			{
-				/*
-				 * Received the padding block for this file, ignore it and
-				 * close the file, then move on to the next tar header.
-				 */
-				fclose(file);
-				file = NULL;
-				totaldone += r;
-				continue;
-			}
-
-			if (fwrite(copybuf, r, 1, file) != 1)
-			{
-				pg_log_error("could not write to file \"%s\": %m", filename);
-				exit(1);
-			}
-			totaldone += r;
-			progress_report(rownum, filename, false);
-
-			current_len_left -= r;
-			if (current_len_left == 0 && current_padding == 0)
-			{
-				/*
-				 * Received the last block, and there is no padding to be
-				 * expected. Close the file and move on to the next tar
-				 * header.
-				 */
-				fclose(file);
-				file = NULL;
-				continue;
-			}
-		}						/* continuing data in existing file */
-	}							/* loop over all data blocks */
-	progress_report(rownum, filename, true);
-
-	if (file != NULL)
+			fclose(state->file);
+			state->file = NULL;
+			return;
+		}
+	}							/* new file */
+	else
 	{
-		pg_log_error("COPY stream ended before last file was finished");
-		exit(1);
-	}
-
-	if (copybuf != NULL)
-		PQfreemem(copybuf);
+		/*
+		 * Continuing blocks in existing file
+		 */
+		if (state->current_len_left == 0 && r == state->current_padding)
+		{
+			/*
+			 * Received the padding block for this file, ignore it and close
+			 * the file, then move on to the next tar header.
+			 */
+			fclose(state->file);
+			state->file = NULL;
+			totaldone += r;
+			return;
+		}
 
-	if (basetablespace && writerecoveryconf)
-		WriteRecoveryConfig(conn, basedir, recoveryconfcontents);
+		if (fwrite(copybuf, r, 1, state->file) != 1)
+		{
+			pg_log_error("could not write to file \"%s\": %m", state->filename);
+			exit(1);
+		}
+		totaldone += r;
+		progress_report(state->tablespacenum, state->filename, false);
 
-	/*
-	 * No data is synced here, everything is done for all tablespaces at the
-	 * end.
-	 */
+		state->current_len_left -= r;
+		if (state->current_len_left == 0 && state->current_padding == 0)
+		{
+			/*
+			 * Received the last block, and there is no padding to be
+			 * expected. Close the file and move on to the next tar header.
+			 */
+			fclose(state->file);
+			state->file = NULL;
+			return;
+		}
+	}							/* continuing data in existing file */
 }
 
-
 static void
 BaseBackup(void)
 {
-- 
1.8.3.1

From c0bb04d4626da331813603627c17c5f30075713a Mon Sep 17 00:00:00 2001
From: Rushabh Lathia <rushabh.lat...@enterprisedb.com>
Date: Tue, 19 Nov 2019 14:42:39 +0530
Subject: [PATCH] Make checksum optional in pg_basebackup.

---
 src/backend/replication/basebackup.c   | 50 ++++++++++++++++++++++++++--------
 src/backend/replication/repl_gram.y    |  6 ++++
 src/backend/replication/repl_scanner.l |  1 +
 src/bin/pg_basebackup/pg_basebackup.c  | 12 ++++++--
 4 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 273f837..a98d949 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -132,6 +132,9 @@ static long long int total_checksum_failures;
 /* Do not verify checksums. */
 static bool noverify_checksums = false;
 
+/* Add file entry in to manifest with checksums. */
+static bool manifest_with_checksums = false;
+
 /*
  * The contents of these directories are removed or recreated during server
  * start so they are not included in backups.  The directories themselves are
@@ -674,6 +677,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 	bool		o_maxrate = false;
 	bool		o_tablespace_map = false;
 	bool		o_noverify_checksums = false;
+	bool		o_manifest_with_checksums = false;
 
 	MemSet(opt, 0, sizeof(*opt));
 	foreach(lopt, options)
@@ -762,6 +766,16 @@ parse_basebackup_options(List *options, basebackup_options *opt)
 			noverify_checksums = true;
 			o_noverify_checksums = true;
 		}
+		else if (strcmp(defel->defname, "manifest_with_checksums") == 0)
+		{
+			if (o_manifest_with_checksums)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("duplicate option \"%s\"", defel->defname)));
+			manifest_with_checksums = true;
+			o_manifest_with_checksums = true;
+		}
+
 		else
 			elog(ERROR, "option \"%s\" not recognized",
 				 defel->defname);
@@ -930,15 +944,18 @@ AddFileToManifest(StringInfo manifest, const char *tsoid,
 	pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", tm);
 
 	/* Convert checksum to hexadecimal. */
-	shatextlen =
-		hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf);
-	Assert(shatextlen + 1 == sizeof(shatextbuf));
-	shatextbuf[shatextlen] = '\0';
+	if (manifest_with_checksums)
+	{
+		shatextlen =
+			hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf);
+		Assert(shatextlen + 1 == sizeof(shatextbuf));
+		shatextbuf[shatextlen] = '\0';
+	}
 
 	/* Add to manifest. */
 	appendStringInfo(manifest, "File\t%s\t%zu\t%s\t%s\n",
 					 escaped_filename == NULL ? filename : escaped_filename,
-					 size, timebuf, shatextbuf);
+					 size, timebuf, manifest_with_checksums ? shatextbuf : "-");
 
 	/* Avoid leaking memory. */
 	if (escaped_filename != NULL)
@@ -1100,7 +1117,8 @@ sendFileWithContent(const char *filename, const char *content,
 	pg_sha256_ctx	sha256_ctx;
 	uint8		shabuf[PG_SHA256_DIGEST_LENGTH];
 
-	pg_sha256_init(&sha256_ctx);
+	if (manifest_with_checksums)
+		pg_sha256_init(&sha256_ctx);
 
 	len = strlen(content);
 
@@ -1134,8 +1152,12 @@ sendFileWithContent(const char *filename, const char *content,
 		pq_putmessage('d', buf, pad);
 	}
 
-	pg_sha256_update(&sha256_ctx, (uint8 *) content, len);
-	pg_sha256_final(&sha256_ctx, shabuf);
+	if (manifest_with_checksums)
+	{
+		pg_sha256_update(&sha256_ctx, (uint8 *) content, len);
+		pg_sha256_final(&sha256_ctx, shabuf);
+	}
+
 	AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime,
 					  shabuf);
 }
@@ -1571,7 +1593,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 	pg_sha256_ctx	sha256_ctx;
 	uint8		shabuf[PG_SHA256_DIGEST_LENGTH];
 
-	pg_sha256_init(&sha256_ctx);
+	if (manifest_with_checksums)
+		pg_sha256_init(&sha256_ctx);
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
@@ -1742,7 +1765,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 					(errmsg("base backup could not send data, aborting backup")));
 
 		/* Also feed it to the checksum machinery. */
-		pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
+		if (manifest_with_checksums)
+			pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
 
 		len += cnt;
 		throttle(cnt);
@@ -1768,7 +1792,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 		{
 			cnt = Min(sizeof(buf), statbuf->st_size - len);
 			pq_putmessage('d', buf, cnt);
-			pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
+			if (manifest_with_checksums)
+				pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
 			len += cnt;
 			throttle(cnt);
 		}
@@ -1801,7 +1826,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 
 	total_checksum_failures += checksum_failures;
 
-	pg_sha256_final(&sha256_ctx, shabuf);
+	if (manifest_with_checksums)
+		pg_sha256_final(&sha256_ctx, shabuf);
 	AddFileToManifest(manifest, tsoid, tarfilename, statbuf->st_size,
 					  statbuf->st_mtime, shabuf);
 
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index c4e11cc..542a3f7 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -87,6 +87,7 @@ static SQLCmd *make_sqlcmd(void);
 %token K_EXPORT_SNAPSHOT
 %token K_NOEXPORT_SNAPSHOT
 %token K_USE_SNAPSHOT
+%token K_MANIFEST_WITH_CHECKSUMS
 
 %type <node>	command
 %type <node>	base_backup start_replication start_logical_replication
@@ -214,6 +215,11 @@ base_backup_opt:
 				  $$ = makeDefElem("noverify_checksums",
 								   (Node *)makeInteger(true), -1);
 				}
+			| K_MANIFEST_WITH_CHECKSUMS
+				{
+				  $$ = makeDefElem("manifest_with_checksums",
+								   (Node *)makeInteger(true), -1);
+				}
 			;
 
 create_replication_slot:
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 380faeb..4f92bc1 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -107,6 +107,7 @@ EXPORT_SNAPSHOT		{ return K_EXPORT_SNAPSHOT; }
 NOEXPORT_SNAPSHOT	{ return K_NOEXPORT_SNAPSHOT; }
 USE_SNAPSHOT		{ return K_USE_SNAPSHOT; }
 WAIT				{ return K_WAIT; }
+MANIFEST_WITH_CHECKSUMS { return K_MANIFEST_WITH_CHECKSUMS; }
 
 ","				{ return ','; }
 ";"				{ return ';'; }
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 508e4a6..f4f8ffe 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -141,6 +141,7 @@ static bool temp_replication_slot = true;
 static bool create_slot = false;
 static bool no_slot = false;
 static bool verify_checksums = true;
+static bool manifest_with_checksums = false;
 
 static bool success = false;
 static bool made_new_pgdata = false;
@@ -398,6 +399,8 @@ usage(void)
 	printf(_("      --no-slot          prevent creation of temporary replication slot\n"));
 	printf(_("      --no-verify-checksums\n"
 			 "                         do not verify checksums\n"));
+	printf(_("      --manifest-with-checksums\n"
+			 "                         do calculate checksums for manifest files\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=CONNSTR   connection string\n"));
@@ -1821,7 +1824,7 @@ BaseBackup(void)
 	}
 
 	basebkp =
-		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s",
+		psprintf("BASE_BACKUP LABEL '%s' %s %s %s %s %s %s %s %s",
 				 escaped_label,
 				 showprogress ? "PROGRESS" : "",
 				 includewal == FETCH_WAL ? "WAL" : "",
@@ -1829,7 +1832,8 @@ BaseBackup(void)
 				 includewal == NO_WAL ? "" : "NOWAIT",
 				 maxrate_clause ? maxrate_clause : "",
 				 format == 't' ? "TABLESPACE_MAP" : "",
-				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS");
+				 verify_checksums ? "" : "NOVERIFY_CHECKSUMS",
+				 manifest_with_checksums ? "MANIFEST_WITH_CHECKSUMS" : "");
 
 	if (PQsendQuery(conn, basebkp) == 0)
 	{
@@ -2162,6 +2166,7 @@ main(int argc, char **argv)
 		{"waldir", required_argument, NULL, 1},
 		{"no-slot", no_argument, NULL, 2},
 		{"no-verify-checksums", no_argument, NULL, 3},
+		{"manifest-with-checksums", no_argument, NULL, 4},
 		{NULL, 0, NULL, 0}
 	};
 	int			c;
@@ -2330,6 +2335,9 @@ main(int argc, char **argv)
 			case 3:
 				verify_checksums = false;
 				break;
+			case 4:
+				manifest_with_checksums = true;
+				break;
 			default:
 
 				/*
-- 
1.8.3.1

From d7f34e6863b4d533d0fc4ada8f020308dfae107d Mon Sep 17 00:00:00 2001
From: Rushabh Lathia <rushabh.lat...@enterprisedb.com>
Date: Sat, 16 Nov 2019 11:57:04 +0530
Subject: [PATCH] Fix warnings.

---
 src/backend/replication/basebackup.c  | 7 +++++--
 src/bin/pg_basebackup/pg_basebackup.c | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 9812f2a..273f837 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -902,6 +902,7 @@ AddFileToManifest(StringInfo manifest, const char *tsoid,
 	static char timebuf[128];
 	static char shatextbuf[PG_SHA256_DIGEST_LENGTH * 2 + 1];
 	int		shatextlen;
+	struct pg_tm *tm;
 
 	/*
 	 * If this file is part of a tablespace, the filename passed to this
@@ -923,8 +924,10 @@ AddFileToManifest(StringInfo manifest, const char *tsoid,
 	 * and since time zone definitions can change, possibly causing confusion,
 	 * use GMT always.
 	 */
-	pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z",
-				pg_gmtime(&mtime));
+	tm = pg_gmtime(&mtime);
+	if (tm == NULL)
+		elog(ERROR, "could not convert epoch to timestamp: %m");
+	pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", tm);
 
 	/* Convert checksum to hexadecimal. */
 	shatextlen =
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index c56246f..508e4a6 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -1194,7 +1194,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 
 		initPQExpBuffer(&buf);
 		ReceiveBackupManifestInMemory(conn, &buf);
-		if (PQExpBufferBroken(&buf))
+		if (PQExpBufferDataBroken(buf))
 		{
 			pg_log_error("out of memory");
 			exit(1);
-- 
1.8.3.1

From 5c3baefd9c9caca252f949528667d2427b037579 Mon Sep 17 00:00:00 2001
From: Rushabh Lathia <rushabh.lat...@enterprisedb.com>
Date: Wed, 13 Nov 2019 15:25:54 +0530
Subject: [PATCH] POC of backup manifest with file names, sizes, timestamps,
 checksums.

---
 src/backend/access/transam/xlog.c     |   3 +-
 src/backend/replication/basebackup.c  | 238 ++++++++++++++++++++++++++++++----
 src/bin/pg_basebackup/pg_basebackup.c | 116 ++++++++++++++++-
 src/include/replication/basebackup.h  |   4 +-
 4 files changed, 334 insertions(+), 27 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3b766e6..b7f1fe5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10495,7 +10495,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 			ti->oid = pstrdup(de->d_name);
 			ti->path = pstrdup(buflinkpath.data);
 			ti->rpath = relpath ? pstrdup(relpath) : NULL;
-			ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
+			ti->size = infotbssize ?
+				sendTablespace(fullpath, ti->oid, true, NULL) : -1;
 
 			if (tablespaces)
 				*tablespaces = lappend(*tablespaces, ti);
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 1fa4551..9812f2a 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -19,6 +19,7 @@
 #include "access/xlog_internal.h"	/* for pg_start/stop_backup */
 #include "catalog/pg_type.h"
 #include "common/file_perm.h"
+#include "common/sha2.h"
 #include "lib/stringinfo.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
@@ -55,16 +56,25 @@ typedef struct
 
 
 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
-					 List *tablespaces, bool sendtblspclinks);
+					 List *tablespaces, bool sendtblspclinks,
+					 StringInfo manifest, const char *tsoid);
 static bool sendFile(const char *readfilename, const char *tarfilename,
-					 struct stat *statbuf, bool missing_ok, Oid dboid);
-static void sendFileWithContent(const char *filename, const char *content);
+					 struct stat *statbuf, bool missing_ok, Oid dboid,
+					 StringInfo manifest, const char *tsoid);
+static void sendFileWithContent(const char *filename, const char *content,
+								StringInfo manifest);
 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
 							 struct stat *statbuf, bool sizeonly);
 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
 						  bool sizeonly);
 static void send_int8_string(StringInfoData *buf, int64 intval);
 static void SendBackupHeader(List *tablespaces);
+static void InitializeManifest(StringInfo manifest);
+static void AddFileToManifest(StringInfo manifest, const char *tsoid,
+							  const char *filename, size_t size, time_t mtime,
+							  uint8 *shabuf);
+static void SendBackupManifest(StringInfo manifest);
+static char *escape_field_for_manifest(const char *s);
 static void base_backup_cleanup(int code, Datum arg);
 static void perform_base_backup(basebackup_options *opt);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
@@ -241,6 +251,7 @@ perform_base_backup(basebackup_options *opt)
 	TimeLineID	endtli;
 	StringInfo	labelfile;
 	StringInfo	tblspc_map_file = NULL;
+	StringInfo	manifest;
 	int			datadirpathlen;
 	List	   *tablespaces = NIL;
 
@@ -250,6 +261,8 @@ perform_base_backup(basebackup_options *opt)
 
 	labelfile = makeStringInfo();
 	tblspc_map_file = makeStringInfo();
+	manifest = makeStringInfo();
+	InitializeManifest(manifest);
 
 	total_checksum_failures = 0;
 
@@ -286,7 +299,10 @@ perform_base_backup(basebackup_options *opt)
 
 		/* Add a node for the base directory at the end */
 		ti = palloc0(sizeof(tablespaceinfo));
-		ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
+		if (opt->progress)
+			ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL);
+		else
+			ti->size = -1;
 		tablespaces = lappend(tablespaces, ti);
 
 		/* Send tablespace header */
@@ -333,7 +349,8 @@ perform_base_backup(basebackup_options *opt)
 				struct stat statbuf;
 
 				/* In the main tar, include the backup_label first... */
-				sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
+				sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data,
+									manifest);
 
 				/*
 				 * Send tablespace_map file if required and then the bulk of
@@ -341,11 +358,12 @@ perform_base_backup(basebackup_options *opt)
 				 */
 				if (tblspc_map_file && opt->sendtblspcmapfile)
 				{
-					sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
-					sendDir(".", 1, false, tablespaces, false);
+					sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data,
+										manifest);
+					sendDir(".", 1, false, tablespaces, false, manifest, NULL);
 				}
 				else
-					sendDir(".", 1, false, tablespaces, true);
+					sendDir(".", 1, false, tablespaces, true, manifest, NULL);
 
 				/* ... and pg_control after everything else. */
 				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@@ -353,10 +371,11 @@ perform_base_backup(basebackup_options *opt)
 							(errcode_for_file_access(),
 							 errmsg("could not stat file \"%s\": %m",
 									XLOG_CONTROL_FILE)));
-				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid);
+				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
+						 false, InvalidOid, manifest, NULL);
 			}
 			else
-				sendTablespace(ti->path, false);
+				sendTablespace(ti->path, ti->oid, false, manifest);
 
 			/*
 			 * If we're including WAL, and this is the main data directory we
@@ -575,7 +594,7 @@ perform_base_backup(basebackup_options *opt)
 			 * complete segment.
 			 */
 			StatusFilePath(pathbuf, walFileName, ".done");
-			sendFileWithContent(pathbuf, "");
+			sendFileWithContent(pathbuf, "", manifest);
 		}
 
 		/*
@@ -598,16 +617,20 @@ perform_base_backup(basebackup_options *opt)
 						(errcode_for_file_access(),
 						 errmsg("could not stat file \"%s\": %m", pathbuf)));
 
-			sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
+			sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid, manifest,
+					 NULL);
 
 			/* unconditionally mark file as archived */
 			StatusFilePath(pathbuf, fname, ".done");
-			sendFileWithContent(pathbuf, "");
+			sendFileWithContent(pathbuf, "", manifest);
 		}
 
 		/* Send CopyDone message for the last tar file */
 		pq_putemptymessage('c');
 	}
+
+	SendBackupManifest(manifest);
+
 	SendXlogRecPtrResult(endptr, endtli);
 
 	if (total_checksum_failures)
@@ -860,6 +883,151 @@ SendBackupHeader(List *tablespaces)
 	pq_puttextmessage('C', "SELECT");
 }
 
+static void
+InitializeManifest(StringInfo manifest)
+{
+	appendStringInfoString(manifest, "PostgreSQL-Backup-Manifest-Version 1\n");
+}
+
+/*
+ * Add an entry to the backup manifest for a file.
+ */
+static void
+AddFileToManifest(StringInfo manifest, const char *tsoid,
+				  const char *filename, size_t size, time_t mtime,
+				  uint8 *shabuf)
+{
+	char	pathbuf[MAXPGPATH];
+	char   *escaped_filename;
+	static char timebuf[128];
+	static char shatextbuf[PG_SHA256_DIGEST_LENGTH * 2 + 1];
+	int		shatextlen;
+
+	/*
+	 * If this file is part of a tablespace, the filename passed to this
+	 * function will be relative to the tar file that contains it. We want
+	 * the pathname relative to the data directory (ignoring the intermediate
+	 * symlink traversal).
+	 */
+	if (tsoid != NULL)
+	{
+		snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", tsoid, filename);
+		filename = pathbuf;
+	}
+
+	/* Escape filename, if necessary. */
+	escaped_filename = escape_field_for_manifest(filename);
+
+	/*
+	 * Convert time to a string. Since it's not clear what time zone to use
+	 * and since time zone definitions can change, possibly causing confusion,
+	 * use GMT always.
+	 */
+	pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z",
+				pg_gmtime(&mtime));
+
+	/* Convert checksum to hexadecimal. */
+	shatextlen =
+		hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH, shatextbuf);
+	Assert(shatextlen + 1 == sizeof(shatextbuf));
+	shatextbuf[shatextlen] = '\0';
+
+	/* Add to manifest. */
+	appendStringInfo(manifest, "File\t%s\t%zu\t%s\t%s\n",
+					 escaped_filename == NULL ? filename : escaped_filename,
+					 size, timebuf, shatextbuf);
+
+	/* Avoid leaking memory. */
+	if (escaped_filename != NULL)
+		pfree(escaped_filename);
+}
+
+/*
+ * Finalize the backup manifest, and send it to the client.
+ */
+static void
+SendBackupManifest(StringInfo manifest)
+{
+	pg_sha256_ctx	sha256_ctx;
+	uint8			shabuf[PG_SHA256_DIGEST_LENGTH];
+	StringInfoData	protobuf;
+	int				shastringlen;
+
+	/* Checksum the manifest. */
+	pg_sha256_init(&sha256_ctx);
+	pg_sha256_update(&sha256_ctx, (uint8 *) manifest->data, manifest->len);
+	pg_sha256_final(&sha256_ctx, shabuf);
+	appendStringInfoString(manifest, "Manifest-Checksum\t");
+	shastringlen = PG_SHA256_DIGEST_LENGTH * 2;
+	enlargeStringInfo(manifest, shastringlen);
+	shastringlen = hex_encode((char *) shabuf, PG_SHA256_DIGEST_LENGTH,
+							  manifest->data + manifest->len);
+	Assert(shastringlen == PG_SHA256_DIGEST_LENGTH * 2);
+	manifest->len += shastringlen;
+	appendStringInfoChar(manifest, '\n');
+
+	/* Send CopyOutResponse message */
+	pq_beginmessage(&protobuf, 'H');
+	pq_sendbyte(&protobuf, 0);	/* overall format */
+	pq_sendint16(&protobuf, 0);	/* natts */
+	pq_endmessage(&protobuf);
+
+	/* Send CopyData message */
+	pq_putmessage('d', manifest->data, manifest->len);
+
+	/* And finally CopyDone message */
+	pq_putemptymessage('c');
+}
+
+/*
+ * Escape a field for inclusion in a manifest.
+ *
+ * We use the following escaping rule: If a field contains \t, \r, or \n,
+ * the field must be surrounded by double-quotes, and any internal double
+ * quotes must be doubled. Otherwise, no escaping is required.
+ *
+ * The return value is a new palloc'd string with escaping added, or NULL
+ * if no escaping is required.
+ */
+static char *
+escape_field_for_manifest(const char *s)
+{
+	bool	escaping_required = false;
+	int		escaped_length = 2;
+	const char   *t;
+	char   *result;
+	char   *r;
+
+	for (t = s; *t != '\0'; ++t)
+	{
+		if (*t == '\t' || *t == '\r' || *t == '\n')
+			escaping_required = true;
+		if (*t == '"')
+			++escaped_length;
+		++escaped_length;
+	}
+
+	if (!escaping_required)
+		return NULL;
+
+	result = palloc(escaped_length + 1);
+	result[0] = '"';
+	result[escaped_length - 1] = '"';
+	result[escaped_length] = '\0';
+	r = result + 1;
+
+	for (t = s; *t != '\0'; ++t)
+	{
+		*(r++) = *t;
+		if (*t == '"')
+			*(r++) = *t;
+	}
+
+	Assert(r == &result[escaped_length - 1]);
+
+	return result;
+}
+
 /*
  * Send a single resultset containing just a single
  * XLogRecPtr record (in text format)
@@ -920,11 +1088,16 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
  * Inject a file with given name and content in the output tar stream.
  */
 static void
-sendFileWithContent(const char *filename, const char *content)
+sendFileWithContent(const char *filename, const char *content,
+					StringInfo manifest)
 {
 	struct stat statbuf;
 	int			pad,
 				len;
+	pg_sha256_ctx	sha256_ctx;
+	uint8		shabuf[PG_SHA256_DIGEST_LENGTH];
+
+	pg_sha256_init(&sha256_ctx);
 
 	len = strlen(content);
 
@@ -957,6 +1130,11 @@ sendFileWithContent(const char *filename, const char *content)
 		MemSet(buf, 0, pad);
 		pq_putmessage('d', buf, pad);
 	}
+
+	pg_sha256_update(&sha256_ctx, (uint8 *) content, len);
+	pg_sha256_final(&sha256_ctx, shabuf);
+	AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime,
+					  shabuf);
 }
 
 /*
@@ -967,7 +1145,7 @@ sendFileWithContent(const char *filename, const char *content)
  * Only used to send auxiliary tablespaces, not PGDATA.
  */
 int64
-sendTablespace(char *path, bool sizeonly)
+sendTablespace(char *path, char *oid, bool sizeonly, StringInfo manifest)
 {
 	int64		size;
 	char		pathbuf[MAXPGPATH];
@@ -1000,7 +1178,7 @@ sendTablespace(char *path, bool sizeonly)
 						   sizeonly);
 
 	/* Send all the files in the tablespace version directory */
-	size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
+	size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest, oid);
 
 	return size;
 }
@@ -1019,7 +1197,7 @@ sendTablespace(char *path, bool sizeonly)
  */
 static int64
 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
-		bool sendtblspclinks)
+		bool sendtblspclinks, StringInfo manifest, const char *tsoid)
 {
 	DIR		   *dir;
 	struct dirent *de;
@@ -1295,7 +1473,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 				skip_this_dir = true;
 
 			if (!skip_this_dir)
-				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
+				size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces,
+								sendtblspclinks, manifest, tsoid);
 		}
 		else if (S_ISREG(statbuf.st_mode))
 		{
@@ -1303,7 +1482,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 
 			if (!sizeonly)
 				sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
-								true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid);
+								true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid,
+								manifest, tsoid);
 
 			if (sent || sizeonly)
 			{
@@ -1366,8 +1546,9 @@ is_checksummed_file(const char *fullpath, const char *filename)
  * and the file did not exist.
  */
 static bool
-sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
-		 bool missing_ok, Oid dboid)
+sendFile(const char *readfilename, const char *tarfilename,
+		 struct stat *statbuf, bool missing_ok, Oid dboid,
+		 StringInfo manifest, const char *tsoid)
 {
 	FILE	   *fp;
 	BlockNumber blkno = 0;
@@ -1384,6 +1565,10 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
+	pg_sha256_ctx	sha256_ctx;
+	uint8		shabuf[PG_SHA256_DIGEST_LENGTH];
+
+	pg_sha256_init(&sha256_ctx);
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
@@ -1553,6 +1738,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 			ereport(ERROR,
 					(errmsg("base backup could not send data, aborting backup")));
 
+		/* Also feed it to the checksum machinery. */
+		pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
+
 		len += cnt;
 		throttle(cnt);
 
@@ -1577,6 +1765,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 		{
 			cnt = Min(sizeof(buf), statbuf->st_size - len);
 			pq_putmessage('d', buf, cnt);
+			pg_sha256_update(&sha256_ctx, (uint8 *) buf, cnt);
 			len += cnt;
 			throttle(cnt);
 		}
@@ -1584,7 +1773,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	/*
 	 * Pad to 512 byte boundary, per tar format requirements. (This small
-	 * piece of data is probably not worth throttling.)
+	 * piece of data is probably not worth throttling, and is not checksummed
+	 * because it's not actually part of the file.)
 	 */
 	pad = ((len + 511) & ~511) - len;
 	if (pad > 0)
@@ -1608,6 +1798,10 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 
 	total_checksum_failures += checksum_failures;
 
+	pg_sha256_final(&sha256_ctx, shabuf);
+	AddFileToManifest(manifest, tsoid, tarfilename, statbuf->st_size,
+					  statbuf->st_mtime, shabuf);
+
 	return true;
 }
 
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 0565212..c56246f 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -88,6 +88,12 @@ typedef struct UnpackTarState
 	FILE	   *file;
 }			UnpackTarState;
 
+typedef struct WriteManifestState
+{
+	char		filename[MAXPGPATH];
+	FILE	   *file;
+}			WriteManifestState;
+
 typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
 								   void *callback_data);
 
@@ -180,6 +186,12 @@ static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
 static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
 static void ReceiveTarAndUnpackCopyChunk(size_t r, char *copybuf,
 										 void *callback_data);
+static void ReceiveBackupManifest(PGconn *conn);
+static void ReceiveBackupManifestChunk(size_t r, char *copybuf,
+									   void *callback_data);
+static void ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf);
+static void ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+											   void *callback_data);
 static void BaseBackup(void);
 
 static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline,
@@ -924,8 +936,8 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback,
 	res = PQgetResult(conn);
 	if (PQresultStatus(res) != PGRES_COPY_OUT)
 	{
-		pg_log_error("could not get COPY data stream: %s",
-					 PQerrorMessage(conn));
+		pg_log_error("could not get COPY data stream: %s [%s]",
+					 PQerrorMessage(conn), PQresStatus(PQresultStatus(res)));
 		exit(1);
 	}
 	PQclear(res);
@@ -1170,6 +1182,31 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
 		}
 	}
 
+	/*
+	 * Normally, we emit the backup manifest as a separate file, but when
+	 * we're writing a tarfile to stdout, we don't have that option, so
+	 * include it in the one tarfile we've got.
+	 */
+	if (strcmp(basedir, "-") == 0)
+	{
+		char		header[512];
+		PQExpBufferData	buf;
+
+		initPQExpBuffer(&buf);
+		ReceiveBackupManifestInMemory(conn, &buf);
+		if (PQExpBufferBroken(&buf))
+		{
+			pg_log_error("out of memory");
+			exit(1);
+		}
+		tarCreateHeader(header, "backup_manifest", NULL, buf.len,
+						pg_file_create_mode, 04000, 02000,
+						time(NULL));
+		writeTarData(&state, header, sizeof(header));
+		writeTarData(&state, buf.data, buf.len);
+		termPQExpBuffer(&buf);
+	}
+
 	/* 2 * 512 bytes empty data at end of file */
 	writeTarData(&state, zerobuf, sizeof(zerobuf));
 
@@ -1417,6 +1454,64 @@ get_tablespace_mapping(const char *dir)
 
 
 /*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifest(PGconn *conn)
+{
+	WriteManifestState state;
+
+	snprintf(state.filename, sizeof(state.filename),
+			 "%s/backup_manifest", basedir);
+	state.file = fopen(state.filename, "wb");
+	if (state.file == NULL)
+	{
+		pg_log_error("could not create file \"%s\": %m", state.filename);
+		exit(1);
+	}
+
+	ReceiveCopyData(conn, ReceiveBackupManifestChunk, &state);
+
+	fclose(state.file);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestChunk(size_t r, char *copybuf, void *callback_data)
+{
+	WriteManifestState *state = callback_data;
+
+	if (fwrite(copybuf, r, 1, state->file) != 1)
+	{
+		pg_log_error("could not write to file \"%s\": %m", state->filename);
+		exit(1);
+	}
+}
+
+/*
+ * Receive the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemory(PGconn *conn, PQExpBuffer buf)
+{
+	ReceiveCopyData(conn, ReceiveBackupManifestInMemoryChunk, buf);
+}
+
+/*
+ * Receive one chunk of the backup manifest file and write it out to a file.
+ */
+static void
+ReceiveBackupManifestInMemoryChunk(size_t r, char *copybuf,
+								   void *callback_data)
+{
+	PQExpBuffer buf = callback_data;
+
+	appendPQExpBuffer(buf, copybuf, r);
+}
+
+/*
  * Receive a tar format stream from the connection to the server, and unpack
  * the contents of it into a directory. Only files, directories and
  * symlinks are supported, no other kinds of special files.
@@ -1658,6 +1753,7 @@ BaseBackup(void)
 				maxServerMajor;
 	int			serverVersion,
 				serverMajor;
+	int			writing_to_stdout;
 
 	Assert(conn != NULL);
 
@@ -1821,7 +1917,8 @@ BaseBackup(void)
 	/*
 	 * When writing to stdout, require a single tablespace
 	 */
-	if (format == 't' && strcmp(basedir, "-") == 0 && PQntuples(res) > 1)
+	writing_to_stdout = format == 't' && strcmp(basedir, "-") == 0;
+	if (writing_to_stdout && PQntuples(res) > 1)
 	{
 		pg_log_error("can only write single tablespace to stdout, database has %d",
 					 PQntuples(res));
@@ -1850,6 +1947,19 @@ BaseBackup(void)
 			ReceiveAndUnpackTarFile(conn, res, i);
 	}							/* Loop over all tablespaces */
 
+	/*
+	 * Now receive backup manifest, if appropriate.
+	 *
+	 * If we're writing a tarfile to stdout, ReceiveTarFile will have already
+	 * processed the backup manifest and included it in the output tarfile.
+	 * Such a configuration doesn't allow for writing multiple files.
+	 *
+	 * If we're talking to an older server, it won't send a backup manifest,
+	 * so don't try to receive one.
+	 */
+	if (!writing_to_stdout && serverMajor >= 1300)
+		ReceiveBackupManifest(conn);
+
 	if (showprogress)
 	{
 		progress_report(PQntuples(res), NULL, true);
diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h
index 503a5b9..8fe0136 100644
--- a/src/include/replication/basebackup.h
+++ b/src/include/replication/basebackup.h
@@ -12,6 +12,7 @@
 #ifndef _BASEBACKUP_H
 #define _BASEBACKUP_H
 
+#include "lib/stringinfo.h"
 #include "nodes/replnodes.h"
 
 /*
@@ -31,6 +32,7 @@ typedef struct
 
 extern void SendBaseBackup(BaseBackupCmd *cmd);
 
-extern int64 sendTablespace(char *path, bool sizeonly);
+extern int64 sendTablespace(char *path, char *oid, bool sizeonly,
+							StringInfo manifest);
 
 #endif							/* _BASEBACKUP_H */
-- 
1.8.3.1

Reply via email to