From 8ddfaea4530d0b2c73b2f826558716330167299b Mon Sep 17 00:00:00 2001
From: Mark Dilger <mark.dilger@enterprisedb.com>
Date: Thu, 22 Apr 2021 12:06:18 -0700
Subject: [PATCH v22] amcheck: adding toast pointer corruption checks

Adding additional checks of toast pointers: checking the extsize
against the rawsize, the uncompressed size against the size limit
for varlena datums, the va_toastrelid field against the heap table's
reltoastrelid, and if compressed, the validity of the compression
method ID.

Adding checks that the toasted attribute chunks are returned by the
toast index scan in order and without duplicates.  Checking that the
chunks do not contain null entries and that the chunks belong to the
right toasted attribute.  Improving the reports of missing or extra
chunks to be more clear to the user.

Changing the logic to continue checking toast even after reporting
that HEAP_HASEXTERNAL is false.  Previously, the toast checking
stopped here, but that wasn't necessary, and subsequent checks may
provide additional useful diagnostic information.
---
 contrib/amcheck/verify_heapam.c  | 647 ++++++++++++++++++++++++++-----
 src/tools/pgindent/typedefs.list |   1 +
 2 files changed, 558 insertions(+), 90 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 9f159eb3db..00cd43353b 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -30,6 +30,9 @@ PG_FUNCTION_INFO_V1(verify_heapam);
 /* The number of columns in tuples returned by verify_heapam */
 #define HEAPCHECK_RELATION_COLS 4
 
+/* The largest valid toast va_rawsize */
+#define VARLENA_SIZE_LIMIT 0x3FFFFFFF
+
 /*
  * Despite the name, we use this for reporting problems with both XIDs and
  * MXIDs.
@@ -146,12 +149,64 @@ typedef struct HeapCheckContext
 	Tuplestorestate *tupstore;
 } HeapCheckContext;
 
+/*
+ * Struct holding the running context information during the check of a single
+ * toasted attribute.
+ */
+typedef struct ToastCheckContext
+{
+	/*
+	 * Cache tracking a sequence of contiguous toast chunks, each of size
+	 * 'extraneous_size', and having sequence numbers outside the expected
+	 * range.  The sequence numbers of such chunks are cached until the
+	 * sequence ends, or a chunk of a different size is encountered, so that
+	 * only then a single toast corruption report can be emitted for the group.
+	 *
+	 * Note that if another type of corruption occurs mid sequence, the
+	 * extraneous sequence of chunks thus far encountered will be reported
+	 * first, then the other corruption.  This means that a single, contiguous
+	 * sequence of extraneous chunks will not always be reported as such, but
+	 * instead be reported as multiple subsequences interrupted by other
+	 * corruption reports.
+	 *
+	 * Note that we do not need a cache tracking missing chunks, because we
+	 * immediately know that contiguous chunks are missing when we see the
+	 * first chunk that means they have been skipped.  The reporting of
+	 * sequences of extraneous chunks and that of seqeunces of missing chunks
+	 * is nearly identical, but the manner in which we calculate them differ.
+	 */
+	bool		have_extraneous_chunks;
+	bool		have_extraneous_size;
+	int32		first_extraneous;
+	int32		last_extraneous;
+	uint32		extraneous_size;
+
+	/*
+	 * How many chunks have been seen so far, including expected, extraneous,
+	 * and corrupt chunks.
+	 */
+	int32		total_chunks;
+
+	/*
+	 * Whether we have seen a chunk with a non-NULL chunk_seq value, and that
+	 * value if so.  Neither value gets updated for chunks with NULL chunk_seq.
+	 */
+	bool		chunk_seq_seen;
+	int32		last_chunk_seq;
+
+	/*
+	 * Expected sequence number and size of the final chunk expected for this
+	 * toasted attribute.
+	 */
+	int32		final_expected_chunk;
+	uint32		final_expected_size;
+} ToastCheckContext;
+
 /* Internal implementation */
 static void sanity_check_relation(Relation rel);
 static void check_tuple(HeapCheckContext *ctx);
-static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
-							  ToastedAttribute *ta, int32 chunkno,
-							  int32 endchunk);
+static int32 check_toast_tuple(HeapTuple toasttup, ToastedAttribute *ta,
+							   ToastCheckContext *tctx, HeapCheckContext *hctx);
 
 static bool check_tuple_attribute(HeapCheckContext *ctx);
 static void check_toasted_attribute(HeapCheckContext *ctx,
@@ -160,9 +215,14 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
 static bool check_tuple_header(HeapCheckContext *ctx);
 static bool check_tuple_visibility(HeapCheckContext *ctx);
 
+static void report_extraneous_chunks(HeapCheckContext *hctx,
+									 ToastedAttribute *ta,
+									 ToastCheckContext *tctx);
 static void report_corruption(HeapCheckContext *ctx, char *msg);
 static void report_toast_corruption(HeapCheckContext *ctx,
-									ToastedAttribute *ta, char *msg);
+									ToastedAttribute *ta,
+									ToastCheckContext *tctx,
+									char *msg);
 static TupleDesc verify_heapam_tupdesc(void);
 static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid,
 														const HeapCheckContext *ctx);
@@ -603,8 +663,16 @@ report_corruption(HeapCheckContext *ctx, char *msg)
  */
 static void
 report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta,
-						char *msg)
+						ToastCheckContext *tctx, char *msg)
 {
+	/*
+	 * If there are any cached extraneous chunks, report those before this
+	 * next message, otherwise the corruptions will appear out of order.
+	 */
+	if (tctx->have_extraneous_chunks)
+		report_extraneous_chunks(ctx, ta, tctx);
+
+	/* Ok, now report the message we were called to report. */
 	report_corruption_internal(ctx->tupstore, ctx->tupdesc, ta->blkno,
 							   ta->offnum, ta->attnum, msg);
 	ctx->is_corrupt = true;
@@ -1147,100 +1215,406 @@ check_tuple_visibility(HeapCheckContext *ctx)
 	return true;
 }
 
-
 /*
- * Check the current toast tuple against the state tracked in ctx, recording
- * any corruption found in ctx->tupstore.
+ * Issues toast corruption reports for the given extraneous chunk, if not null,
+ * along with any extraneous chunks in the tctx cache, which is then cleared.
+ * An "extraneous chunk" is one with a sequence number outside the expected
+ * range for the toasted attribute.
  *
- * This is not equivalent to running verify_heapam on the toast table itself,
- * and is not hardened against corruption of the toast table.  Rather, when
- * validating a toasted attribute in the main table, the sequence of toast
- * tuples that store the toasted value are retrieved and checked in order, with
- * each toast tuple being checked against where we are in the sequence, as well
- * as each toast tuple having its varlena structure sanity checked.
+ * To report extraneous chunks and clear the cache, call with chunk and
+ * chunksize NULL.  If the cache is already empty, the call is harmless.
  *
- * Returns whether the toast tuple passed the corruption checks.
+ * chunksize: the size of the single chunk being reported
+ * have_chunksize: true if 'chunksize' is valid
  */
 static void
-check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
-				  ToastedAttribute *ta, int32 chunkno, int32 endchunk)
+report_extraneous_chunks(HeapCheckContext *hctx, ToastedAttribute *ta,
+						 ToastCheckContext *tctx)
 {
-	int32		curchunk;
-	Pointer		chunk;
-	bool		isnull;
-	int32		chunksize;
-	int32		expected_size;
+	if (!tctx->have_extraneous_chunks)
+		return;
 
 	/*
-	 * Have a chunk, extract the sequence number and the data
+	 * Clear the flag before calling report_toast_corruption to avoid
+	 * infinite recursion.
 	 */
-	curchunk = DatumGetInt32(fastgetattr(toasttup, 2,
-										 ctx->toast_rel->rd_att, &isnull));
-	if (isnull)
-	{
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u has toast chunk with null sequence number",
-										 ta->toast_pointer.va_valueid));
-		return;
-	}
-	chunk = DatumGetPointer(fastgetattr(toasttup, 3,
-										ctx->toast_rel->rd_att, &isnull));
-	if (isnull)
-	{
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u chunk %d has null data",
-										 ta->toast_pointer.va_valueid, chunkno));
-		return;
-	}
-	if (!VARATT_IS_EXTENDED(chunk))
-		chunksize = VARSIZE(chunk) - VARHDRSZ;
-	else if (VARATT_IS_SHORT(chunk))
+	tctx->have_extraneous_chunks = false;
+
+	if (tctx->first_extraneous < tctx->last_extraneous &&
+		tctx->have_extraneous_size)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u has unexpected chunks %d through %d each with size %u",
+									 ta->toast_pointer.va_valueid,
+									 tctx->first_extraneous,
+									 tctx->last_extraneous,
+									 tctx->extraneous_size));
+	else if (tctx->first_extraneous < tctx->last_extraneous)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u has unexpected chunks %d through %d each with corrupt chunk data",
+									 ta->toast_pointer.va_valueid,
+									 tctx->first_extraneous,
+									 tctx->last_extraneous));
+	else if (tctx->have_extraneous_size)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u has unexpected chunk %d with size %u",
+									 ta->toast_pointer.va_valueid,
+									 tctx->first_extraneous,
+									 tctx->extraneous_size));
+	else
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u has unexpected chunk %d with corrupt chunk data",
+									 ta->toast_pointer.va_valueid,
+									 tctx->first_extraneous));
+}
+
+/*
+ * Records that a toast chunk should be reported as extraneous.  After
+ * finishing all calls to this function for a given toasted attribute, a call
+ * to report_extraneous_chunks() should be issued to flush the cache.
+ */
+static void
+handle_extraneous_chunk(HeapCheckContext *hctx, ToastedAttribute *ta,
+						ToastCheckContext *tctx, int32 chunk_seq,
+						uint32 chunksize, bool have_chunksize)
+{
+	if (tctx->have_extraneous_chunks)
 	{
+		if (tctx->last_extraneous == chunk_seq - 1 &&
+			tctx->have_extraneous_size == have_chunksize &&
+			(tctx->extraneous_size == chunksize || !have_chunksize))
+		{
+			/*
+			 * This is the next chunk in an ongoing sequence of equally sized
+			 * or corrupted chunks.  Extend it, but do not report it yet.
+			 */
+			tctx->last_extraneous = chunk_seq;
+			return;
+		}
+
 		/*
-		 * could happen due to heap_form_tuple doing its thing
+		 * There is an ongoing sequence, but this chunk is discontiguous with
+		 * it or of a different size or corruption status.  Report the sequence
+		 * and clear the cache so we can start over with this chunk.
 		 */
-		chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+		report_extraneous_chunks(hctx, ta, tctx);
 	}
-	else
-	{
-		/* should never happen */
-		uint32		header = ((varattrib_4b *) chunk)->va_4byte.va_header;
 
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u chunk %d has invalid varlena header %0x",
-										 ta->toast_pointer.va_valueid,
-										 chunkno, header));
+	/* Start a new sequence, but do not report it yet. */
+	tctx->first_extraneous = chunk_seq;
+	tctx->last_extraneous = chunk_seq;
+	tctx->extraneous_size = chunksize;
+	tctx->have_extraneous_size = have_chunksize;
+	tctx->have_extraneous_chunks = true;
+	return;
+}
+
+/*
+ * Helper function for report_missing_chunks()
+ */
+static void
+report_missing_sequence(HeapCheckContext *hctx, ToastedAttribute *ta,
+						ToastCheckContext *tctx, int32 first_missing,
+						int32 last_missing)
+{
+	report_toast_corruption(hctx, ta, tctx,
+							psprintf("toast value %u missing chunks %d through %d with expected size %u",
+									 ta->toast_pointer.va_valueid,
+									 first_missing, last_missing,
+									 (unsigned)TOAST_MAX_CHUNK_SIZE));
+}
+
+/*
+ * Helper function for report_missing_chunks()
+ */
+static void
+report_missing_chunk(HeapCheckContext *hctx, ToastedAttribute *ta,
+					 ToastCheckContext *tctx, int32 missing_chunk,
+					 uint32 missing_size)
+{
+	report_toast_corruption(hctx, ta, tctx,
+							psprintf("toast value %u missing chunk %d with expected size %u",
+									 ta->toast_pointer.va_valueid,
+									 missing_chunk, missing_size));
+}
+
+/*
+ * Issues toast corruption reports for one or more missing toast chunks
+ * in the [first_missing..last_missing] range intersected with the
+ * [0..final_expected_chunk] range.
+ */
+static void
+report_missing_chunks(HeapCheckContext *hctx, ToastedAttribute *ta,
+					  ToastCheckContext *tctx, int32 first_missing,
+					  int32 last_missing)
+{
+	uint32	expected_size;
+
+	/*
+	 * Adjust the range of missing values to not extend beyond
+	 * [0..final_expected_chunk] on either end of the range.
+	 */
+	if (first_missing < 0)
+		first_missing = 0;
+	if (last_missing > tctx->final_expected_chunk)
+		last_missing = tctx->final_expected_chunk;
+
+	/* Check whether any missing chunks remain to complain about. */
+	if (first_missing > last_missing)
 		return;
-	}
+
+	if (last_missing < tctx->final_expected_chunk)
+		expected_size = TOAST_MAX_CHUNK_SIZE;
+	else
+		expected_size = tctx->final_expected_size;
 
 	/*
-	 * Some checks on the data we've found
+	 * Report missing chunks with language matching language used for reporting
+	 * extraneous chunks.  Mention the sizes expected for the missing chunks so
+	 * the user can reconcile that against any extraneous chunk reports.
 	 */
-	if (curchunk != chunkno)
+	if (last_missing > first_missing + 1 &&
+		expected_size < TOAST_MAX_CHUNK_SIZE)
 	{
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u chunk %d has sequence number %d, but expected sequence number %d",
-										 ta->toast_pointer.va_valueid,
-										 chunkno, curchunk, chunkno));
-		return;
+		report_missing_sequence(hctx, ta, tctx, first_missing, last_missing -1);
+		report_missing_chunk(hctx, ta, tctx, last_missing, expected_size);
 	}
-	if (chunkno > endchunk)
+	else if (last_missing == first_missing + 1 &&
+			 expected_size < TOAST_MAX_CHUNK_SIZE)
 	{
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u chunk %d follows last expected chunk %d",
-										 ta->toast_pointer.va_valueid,
-										 chunkno, endchunk));
-		return;
+		report_missing_chunk(hctx, ta, tctx, first_missing, TOAST_MAX_CHUNK_SIZE);
+		report_missing_chunk(hctx, ta, tctx, last_missing, expected_size);
 	}
+	else if (last_missing > first_missing)
+		report_missing_sequence(hctx, ta, tctx, first_missing, last_missing);
+	else
+		report_missing_chunk(hctx, ta, tctx, last_missing, expected_size);
+}
 
-	expected_size = curchunk < endchunk ? TOAST_MAX_CHUNK_SIZE
-		: VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer) - (endchunk * TOAST_MAX_CHUNK_SIZE);
+/*
+ * Check the current toast tuple, recording any corruption found in
+ * ctx->tupstore.
+ *
+ * This is not equivalent to running verify_heapam on the toast table itself,
+ * and is not hardened against corruption of the toast table.  Rather, when
+ * validating a toasted attribute in the main table, the sequence of toast
+ * tuples that store the toasted value are retrieved and checked in order, with
+ * each toast tuple being checked against where we are in the sequence, as well
+ * as each toast tuple having its varlena structure sanity checked.
+ *
+ * Returns the size of the current toast tuple chunk, or zero if the chunk is
+ * not sufficiently sensible for the chunk size to be determined.
+ */
+static int32
+check_toast_tuple(HeapTuple toasttup, ToastedAttribute *ta,
+				  ToastCheckContext *tctx, HeapCheckContext *hctx)
+{
+	int32		chunk_id;
+	int32		chunk_seq;
+	Pointer		chunk_data;
+	bool		id_isnull;
+	bool		seq_isnull;
+	bool		data_isnull;
+	uint32		chunksize;
+	int32		va_valueid;
+	uint32		va_header;
+	bool		header_invalid = false;
+	bool		id_mismatch = false;
+
+	/* Extract the valueid from our toast pointer. */
+	va_valueid = ta->toast_pointer.va_valueid;
+
+	/* Have a chunk, extract the chunk id, sequence number, and data. */
+	chunk_id = DatumGetObjectId(fastgetattr(toasttup, 1,
+											hctx->toast_rel->rd_att,
+											&id_isnull));
+	chunk_seq = DatumGetInt32(fastgetattr(toasttup, 2, hctx->toast_rel->rd_att,
+										  &seq_isnull));
+	chunk_data = DatumGetPointer(fastgetattr(toasttup, 3,
+											 hctx->toast_rel->rd_att,
+											 &data_isnull));
+
+	/* Sanity check the chunk data and get the size. */
+	if (!data_isnull)
+	{
+		if (!VARATT_IS_EXTENDED(chunk_data))
+			chunksize = VARSIZE(chunk_data) - VARHDRSZ;
+		else if (VARATT_IS_SHORT(chunk_data))
+			chunksize = VARSIZE_SHORT(chunk_data) - VARHDRSZ_SHORT;
+		else
+		{
+			header_invalid = true;
+			va_header = ((varattrib_4b *) chunk_data)->va_4byte.va_header;
+		}
+	}
 
-	if (chunksize != expected_size)
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u chunk %d has size %u, but expected size %u",
-										 ta->toast_pointer.va_valueid,
-										 chunkno, chunksize, expected_size));
+	/* The chunk_id should match this attribute's va_valueid. */
+	if (!id_isnull && chunk_id != va_valueid)
+		id_mismatch = true;
+
+	/*
+	 * The toast table should never contain null values, and the toast index
+	 * scan should never return chunks for values other than the one we
+	 * requested.  The data's varlena header should also be valid.
+	 *
+	 * If these expectations are violated in multiple ways, we cannot reliably
+	 * identify the chunk we are complaining about across multiple messages, so
+	 * we have to report all the problems in a single combined message.  (There
+	 * are specific examples below that we could break apart, but it hardly
+	 * seems worth doing so.)  Reporting each problem separately would create
+	 * ambiguity between corruptions occurring across successive chunks and
+	 * those same corruptions all in the same chunk.
+	 */
+	if (id_isnull && seq_isnull && data_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk with null value, sequence number and data",
+										 va_valueid));
+	else if (id_mismatch && seq_isnull && data_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk for value %d, sequence number and data",
+										 va_valueid,
+										 chunk_id));
+	else if (id_isnull && seq_isnull && header_invalid)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk with null value, null sequence number and invalid varlena header %0x",
+										 va_valueid,
+										 va_header));
+	else if (id_mismatch && seq_isnull && header_invalid)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk for value %d, null sequence number and invalid varlena header %0x",
+										 va_valueid,
+										 chunk_id,
+										 va_header));
+	else if (id_isnull && seq_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk with null value and sequence number",
+										 va_valueid));
+	else if (id_mismatch && seq_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk for value %d with null sequence number",
+										 va_valueid,
+										 chunk_id));
+	else if (id_isnull && data_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d with null value and data",
+										 va_valueid,
+										 chunk_seq));
+	else if (id_mismatch && data_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d for value %d with null data",
+										 va_valueid,
+										 chunk_seq,
+										 chunk_id));
+	else if (id_isnull && header_invalid)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d with null value and invalid varlena header %0x",
+										 va_valueid,
+										 chunk_seq,
+										 va_header));
+	else if (id_mismatch && header_invalid)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d for value %d with invalid varlena header %0x",
+										 va_valueid,
+										 chunk_seq,
+										 chunk_id,
+										 va_header));
+	else if (id_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d with null value",
+										 va_valueid,
+										 chunk_seq));
+	else if (id_mismatch)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d for value %d",
+										 va_valueid,
+										 chunk_seq,
+										 chunk_id));
+	else if (seq_isnull && data_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk with null sequence number and data",
+										 va_valueid));
+	else if (seq_isnull && header_invalid)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk with null sequence number and invalid varlena header %0x",
+										 va_valueid,
+										 va_header));
+	else if (seq_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk with null sequence number",
+										 va_valueid));
+	else if (data_isnull)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d with null data",
+										 va_valueid,
+										 chunk_seq));
+	else if (header_invalid)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast index scan for value %u returned toast chunk %d with invalid varlena header %0x",
+										 va_valueid,
+										 chunk_seq,
+										 va_header));
+
+	/*
+	 * Remaining checks concern where this chunk falls into the sequence
+	 * relative to other chunks for this attribute.  If this chunk does not
+	 * properly belong to the attribute or has a null chunk_seq value, we
+	 * cannot perform such checks, so we're done.
+	 */
+	if (id_isnull || id_mismatch || seq_isnull)
+		return 0;
+
+	/*
+	 * Assuming the chunk_seq values are being returned to us in the correct
+	 * order, complain if this chunk_seq indicates that any expected chunks
+	 * have been skipped.  Note that if the skipped chunks are later returned,
+	 * an additional report about the misordering will be issued.
+	 */
+	if (!tctx->chunk_seq_seen && chunk_seq > 0)
+		report_missing_chunks(hctx, ta, tctx, 0, chunk_seq-1);
+	else if (tctx->chunk_seq_seen && chunk_seq > tctx->last_chunk_seq+1)
+		report_missing_chunks(hctx, ta, tctx, tctx->last_chunk_seq+1, chunk_seq-1);
+
+	/* Complain if the chunk sequence number retreats. */
+	else if (tctx->chunk_seq_seen && chunk_seq < tctx->last_chunk_seq)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u index scan returned chunk %d after chunk %d",
+										 va_valueid,
+										 chunk_seq, tctx->last_chunk_seq));
+
+	/* Complain if the same chunk sequence number is returned multiple times. */
+	else if (tctx->chunk_seq_seen && chunk_seq == tctx->last_chunk_seq)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u index scan returned duplicate chunk %d",
+										 va_valueid,
+										 chunk_seq));
+
+
+	/* Report an extraneous chunk outside the expected sequence. */
+	if (chunk_seq < 0 || chunk_seq > tctx->final_expected_chunk)
+		handle_extraneous_chunk(hctx, ta, tctx, chunk_seq, chunksize,
+								!header_invalid);
+
+	/* Report a partial chunk before the final expected chunk. */
+	else if (chunk_seq < tctx->final_expected_chunk && chunksize != TOAST_MAX_CHUNK_SIZE)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u chunk %d has size %u, but expected chunk with size %u",
+										 va_valueid,
+										 chunk_seq, chunksize,
+										 (unsigned)TOAST_MAX_CHUNK_SIZE));
+
+	/* Report a final chunk of the wrong size. */
+	else if (chunk_seq == tctx->final_expected_chunk && chunksize != tctx->final_expected_size)
+		report_toast_corruption(hctx, ta, tctx,
+								psprintf("toast value %u chunk %d has size %u, but expected chunk with size %u",
+										 va_valueid,
+										 chunk_seq, chunksize,
+										 tctx->final_expected_size));
+
+	/* Remember that we have seen this chunk for next time. */
+	tctx->chunk_seq_seen = true;
+	tctx->last_chunk_seq = chunk_seq;
+	tctx->total_chunks++;
+
+	return chunksize;
 }
 
 /*
@@ -1379,14 +1753,55 @@ check_tuple_attribute(HeapCheckContext *ctx)
 	 */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
+	/* Oversized toasted attributes should never be stored */
+	if (toast_pointer.va_rawsize > VARLENA_SIZE_LIMIT)
+		report_corruption(ctx,
+						  psprintf("toast value %u rawsize %u exceeds limit %u",
+								   toast_pointer.va_valueid,
+								   toast_pointer.va_rawsize,
+								   VARLENA_SIZE_LIMIT));
+
+	/* Compression should never expand the attribute */
+	if (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) > toast_pointer.va_rawsize - VARHDRSZ)
+		report_corruption(ctx,
+						  psprintf("toast value %u external size %u exceeds maximum expected for rawsize %u",
+								   toast_pointer.va_valueid,
+								   VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer),
+								   toast_pointer.va_rawsize));
+
+	/* Compressed attributes should have a valid compression method */
+	if (VARATT_IS_COMPRESSED(&toast_pointer))
+	{
+		ToastCompressionId cmid;
+		bool		valid = false;
+
+		cmid = TOAST_COMPRESS_METHOD(&toast_pointer);
+		switch (cmid)
+		{
+			/* List of all valid compression method IDs */
+			case TOAST_PGLZ_COMPRESSION_ID:
+			case TOAST_LZ4_COMPRESSION_ID:
+				valid = true;
+				break;
+
+			/* Recognized but invalid compression method ID */
+			case TOAST_INVALID_COMPRESSION_ID:
+				break;
+
+			/* Intentionally no default here */
+		}
+
+		if (!valid)
+			report_corruption(ctx,
+							  psprintf("toast value %u has invalid compression method id %d",
+									   toast_pointer.va_valueid, cmid));
+	}
+
 	/* The tuple header better claim to contain toasted values */
 	if (!(infomask & HEAP_HASEXTERNAL))
-	{
 		report_corruption(ctx,
 						  psprintf("toast value %u is external but tuple header flag HEAP_HASEXTERNAL not set",
 								   toast_pointer.va_valueid));
-		return true;
-	}
 
 	/* The relation better have a toast table */
 	if (!ctx->rel->rd_rel->reltoastrelid)
@@ -1397,6 +1812,14 @@ check_tuple_attribute(HeapCheckContext *ctx)
 		return true;
 	}
 
+	/* The toast pointer had better point at the relation's toast table */
+	if (toast_pointer.va_toastrelid != ctx->rel->rd_rel->reltoastrelid)
+		report_corruption(ctx,
+						  psprintf("toast value %u toast relation oid %u differs from expected oid %u",
+								   toast_pointer.va_valueid,
+								   toast_pointer.va_toastrelid,
+								   ctx->rel->rd_rel->reltoastrelid));
+
 	/* If we were told to skip toast checking, then we're done. */
 	if (ctx->toast_rel == NULL)
 		return true;
@@ -1436,10 +1859,22 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
 	SysScanDesc toastscan;
 	bool		found_toasttup;
 	HeapTuple	toasttup;
-	int32		chunkno;
-	int32		endchunk;
-
-	endchunk = (VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer) - 1) / TOAST_MAX_CHUNK_SIZE;
+	int64		totalsize;		/* corrupt toast could overflow 32 bits */
+	int32		extsize;
+	ToastCheckContext tctx;
+
+	/* Calculate expected number of chunks and size of final chunk */
+	extsize = VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer);
+	tctx.final_expected_chunk = (extsize - 1) / TOAST_MAX_CHUNK_SIZE;
+	tctx.final_expected_size = extsize - tctx.final_expected_chunk * TOAST_MAX_CHUNK_SIZE;
+
+	/* Have not yet seen any chunks for this toast tuple */
+	tctx.have_extraneous_chunks = false;
+	tctx.first_extraneous = -1;
+	tctx.last_extraneous = -1;
+	tctx.chunk_seq_seen = false;
+	tctx.last_chunk_seq = -1;
+	tctx.total_chunks = 0;
 
 	/*
 	 * Setup a scan key to find chunks in toast table with matching va_valueid
@@ -1458,27 +1893,59 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
 										   ctx->valid_toast_index,
 										   &SnapshotToast, 1,
 										   &toastkey);
-	chunkno = 0;
+	totalsize = 0;
 	found_toasttup = false;
 	while ((toasttup =
 			systable_getnext_ordered(toastscan,
 									 ForwardScanDirection)) != NULL)
 	{
+		totalsize += check_toast_tuple(toasttup, ta, &tctx, ctx);
 		found_toasttup = true;
-		check_toast_tuple(toasttup, ctx, ta, chunkno, endchunk);
-		chunkno++;
 	}
 	systable_endscan_ordered(toastscan);
 
 	if (!found_toasttup)
-		report_toast_corruption(ctx, ta,
+	{
+		report_toast_corruption(ctx, ta, &tctx,
 								psprintf("toast value %u not found in toast table",
 										 ta->toast_pointer.va_valueid));
-	else if (chunkno != (endchunk + 1))
-		report_toast_corruption(ctx, ta,
-								psprintf("toast value %u was expected to end at chunk %d, but ended at chunk %d",
+		return;
+	}
+
+	/* Flush any cached extraneous chunks seen in the loop above. */
+	report_extraneous_chunks(ctx, ta, &tctx);
+
+	/*
+	 * Any chunks missing from the beginning or middle of the sequence were
+	 * already reported within check_toast_tuple(), but we need to report
+	 * any chunks missing from the end of the sequence.
+	 */
+	if (tctx.last_chunk_seq < tctx.final_expected_chunk)
+		report_missing_chunks(ctx, ta, &tctx, tctx.last_chunk_seq+1,
+							  tctx.final_expected_chunk);
+
+	/*
+	 * Report a summary message for this toasted attribute if the size and
+	 * structure of the attribute in its totality differs from our
+	 * expectations.
+	 */
+	if (!tctx.chunk_seq_seen)
+		report_toast_corruption(ctx, ta, &tctx,
+								psprintf(ngettext("toast value %u was expected to end with chunk %d and total size %d but ends after %d chunk with null sequence number",
+												  "toast value %u was expected to end with chunk %d and total size %d but ends after %d chunks with null sequence number",
+												  tctx.total_chunks),
+										 ta->toast_pointer.va_valueid,
+										 tctx.final_expected_chunk, extsize,
+										 tctx.total_chunks));
+	else if (extsize != totalsize || tctx.final_expected_chunk != tctx.last_chunk_seq)
+		report_toast_corruption(ctx, ta, &tctx,
+								psprintf(ngettext("toast value %u was expected to end with chunk %d and total size %d but ends after %d chunk with chunk %d and total size " INT64_FORMAT,
+												  "toast value %u was expected to end with chunk %d and total size %d but ends after %d chunks with chunk %d and total size " INT64_FORMAT,
+												  tctx.total_chunks),
 										 ta->toast_pointer.va_valueid,
-										 (endchunk + 1), chunkno));
+										 tctx.final_expected_chunk, extsize,
+										 tctx.total_chunks,
+										 tctx.last_chunk_seq, totalsize));
 }
 
 /*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index c7aff677d4..996ef03180 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2558,6 +2558,7 @@ TimestampTz
 TmFromChar
 TmToChar
 ToastAttrInfo
+ToastCheckContext
 ToastTupleContext
 ToastedAttribute
 TocEntry
-- 
2.21.1 (Apple Git-122.3)

