Hi hackers!
This proposal aims to provide the ability to de-TOAST a fully TOAST'd and
compressed field using an iterator and then update the appropriate parts of
the code to use the iterator where possible instead of de-TOAST'ing and
de-compressing the entire value. Examples where this can be helpful include
using position() from the beginning of the value, or doing a pattern or
substring match.

de-TOAST iterator overview:
1. The caller requests the slice of the attribute value from the de-TOAST
iterator.
2. The de-TOAST iterator checks if there is a slice available in the output
buffer, if there is, return the result directly,
    otherwise goto the step3.
3. The de-TOAST iterator checks if there is the slice available in the
input buffer, if there is, goto step44. Otherwise,
    call fetch_datum_iterator to fetch datums from disk to input buffer.
4. If the data in the input buffer is compressed, extract some data from
the input buffer to the output buffer until the caller's
    needs are met.

I've implemented the prototype and apply it to the position() function to
test performance.
Test tables:
-----------------------------------------------------------------------------------------------------
create table detoast_c (id serial primary key,
a text
);
insert into detoast_c (a) select
repeat('1234567890-=abcdefghijklmnopqrstuvwxyz', 1000000)||'321' as a from
generate_series(1,100);

create table detoast_u (id serial primary key,
a text
);
alter table detoast_u alter a set storage external;
insert into detoast_u (a) select
repeat('1234567890-=abcdefghijklmnopqrstuvwxyz', 1000000)||'321' as a from
generate_series(1,100);
**************************************************************************************
-----------------------------------------------------------------------------------------------------
                         query                                    |
 master (ms)  |  patch  (ms)  |
-----------------------------------------------------------------------------------------------------
select position('123' in a) from detoast_c;    |     4054.838       |
1440.735   |
-----------------------------------------------------------------------------------------------------
select position('321' in a) from detoast_c;    |     25549.270     |
 27696.245  |
-----------------------------------------------------------------------------------------------------
select position('123' in a) from detoast_u;    |     8116.996       |
1386.802   |
-----------------------------------------------------------------------------------------------------
select position('321' in a) from detoast_u     |     28442.116     |
 27672.319  |
-----------------------------------------------------------------------------------------------------
**************************************************************************************
It can be seen that the iterator greatly improves the efficiency of partial
de-TOAST when it has almost no degradation in full de-TOAST efficiency.
Next, I will continue to study how to apply iterators to more queries
and improve iterator efficiency, such as using macros instead of function
calls.

The patch is also available on github[1].
Any suggestions or comments would be much appreciated:)

Best regards, Binguo Bao.

[1] https://github.com/djydewang/postgres/pull/1/files
From b071bf9801f45d5ff48422b44bd5042ae19ea20c Mon Sep 17 00:00:00 2001
From: BBG <djydew...@gmail.com>
Date: Tue, 4 Jun 2019 22:56:42 +0800
Subject: [PATCH] de-TOASTing using a iterator

---
 src/backend/access/heap/tuptoaster.c | 488 +++++++++++++++++++++++++++++++++++
 src/backend/utils/adt/varlena.c      |  48 ++--
 src/include/access/tuptoaster.h      |  92 +++++++
 3 files changed, 612 insertions(+), 16 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 55d6e91..92dc87a 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -83,6 +83,13 @@ static int	toast_open_indexes(Relation toastrel,
 static void toast_close_indexes(Relation *toastidxs, int num_indexes,
 								LOCKMODE lock);
 static void init_toast_snapshot(Snapshot toast_snapshot);
+static FetchDatumIterator create_fetch_datum_iterator(struct varlena *attr);
+static bool free_fetch_datum_iterator(FetchDatumIterator iter);
+static int32 fetch_datum_iterate(FetchDatumIterator iter);
+static void init_toast_buffer(ToastBuffer *buf, int size, bool compressed);
+static bool free_toast_buffer(ToastBuffer *buf);
+static int32 pglz_decompress_iterate(ToastBuffer *source, ToastBuffer *dest,
+									 DetoastIterator iter, int32 length);
 
 
 /* ----------
@@ -347,6 +354,145 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 
 
 /* ----------
+ * create_detoast_iterator -
+ *
+ * Initialize detoast iterator.
+ * ----------
+ */
+DetoastIterator create_detoast_iterator(struct varlena *attr) {
+	struct varatt_external toast_pointer;
+	DetoastIterator iterator = NULL;
+	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	{
+		/*
+		 * This is an externally stored datum --- create fetch datum iterator
+		 */
+		iterator = (DetoastIterator) palloc0(sizeof(DetoastIteratorData));
+		iterator->fetch_datum_iterator = create_fetch_datum_iterator(attr);
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+		{
+			/* If it's compressed, prepare buffer for raw data */
+			iterator->buf = (ToastBuffer *) palloc0(sizeof(ToastBuffer));
+			init_toast_buffer(iterator->buf, toast_pointer.va_rawsize, false);
+			iterator->source = NULL;
+			iterator->ctrlc = 0;
+			iterator->compressed = true;
+			iterator->done = false;
+		}
+		else
+		{
+			iterator->buf = iterator->fetch_datum_iterator->buf;
+			iterator->source = NULL;
+			iterator->ctrlc = 0;
+			iterator->compressed = false;
+			iterator->done = false;
+		}
+	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		/*
+		 * This is an indirect pointer --- dereference it
+		 */
+		struct varatt_indirect redirect;
+
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		attr = (struct varlena *) redirect.pointer;
+
+		/* nested indirect Datums aren't allowed */
+		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+		/* recurse in case value is still extended in some other way */
+		iterator = create_detoast_iterator(attr);
+
+	}
+	else if (VARATT_IS_COMPRESSED(attr))
+	{
+		/*
+		 * This is a compressed value inside of the main tuple
+		 */
+		iterator = (DetoastIterator) palloc0(sizeof(DetoastIteratorData));
+		iterator->fetch_datum_iterator = NULL;
+		iterator->source = palloc0(sizeof(ToastBuffer));
+		iterator->source->buf = (const char*) attr;
+		iterator->source->position = TOAST_COMPRESS_RAWDATA(attr);
+		iterator->source->limit = (char *)attr + VARSIZE(attr);
+		iterator->source->capacity = iterator->source->limit;
+
+		iterator->buf = palloc0(sizeof(ToastBuffer));
+		init_toast_buffer(iterator->buf, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ, false);
+
+		iterator->ctrlc = 0;
+		iterator->compressed = true;
+		iterator->done = false;
+	}
+
+	return iterator;
+}
+
+
+/* ----------
+ * free_detoast_iterator -
+ *
+ * Free the memory space occupied by the de-Toast iterator.
+ * ----------
+ */
+bool free_detoast_iterator(DetoastIterator iter) {
+	if (iter == NULL)
+	{
+		return false;
+	}
+	if (iter->buf != iter->fetch_datum_iterator->buf)
+	{
+		free_toast_buffer(iter->buf);
+	}
+	free_fetch_datum_iterator(iter->fetch_datum_iterator);
+	free_toast_buffer(iter->source);
+	pfree(iter);
+	return true;
+}
+
+
+/* ----------
+ * detoast_iterate -
+ *
+ * Iterate through the toasted value referenced by iterator.
+ *
+ * As long as there is another slice in compression or external storage,
+ * detoast it into toast buffer in iterator, and return available slice length.
+ * Return -1 when no more data.
+ * ----------
+ */
+extern int32 detoast_iterate(int32 length, DetoastIterator iter)
+{
+	if (iter == NULL)
+	{
+		elog(ERROR, "detoast_iterate shouln't be called for NULL iterator");
+	}
+
+	if (iter->buf->limit - iter->buf->position >= length || iter->done)
+	{
+		return iter->buf->limit - iter->buf->position;
+	}
+
+	if (iter->fetch_datum_iterator != NULL)
+	{
+		ToastBuffer *buf = iter->fetch_datum_iterator->buf;
+		FetchDatumIterator fetch_iter = iter->fetch_datum_iterator;
+		while(buf->limit - buf->position < length && !fetch_iter->done) {
+			fetch_datum_iterate(fetch_iter);
+		}
+		if (iter->compressed) {
+			return pglz_decompress_iterate(buf, iter->buf, iter, length);
+		}
+		return iter->buf->limit - iter->buf->position;
+	}
+
+	return pglz_decompress_iterate(iter->source, iter->buf, iter, length);
+}
+
+
+/* ----------
  * toast_raw_datum_size -
  *
  *	Return the raw (detoasted) size of a varlena datum
@@ -2409,3 +2555,345 @@ init_toast_snapshot(Snapshot toast_snapshot)
 
 	InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
 }
+
+
+/* ----------
+ * create_fetch_datum_iterator -
+ *
+ * Initialize fetch datum iterator.
+ * ----------
+ */
+static FetchDatumIterator
+create_fetch_datum_iterator(struct varlena *attr) {
+	int			validIndex;
+
+	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+		elog(ERROR, "create_fetch_datum_itearator shouldn't be called for non-ondisk datums");
+
+	FetchDatumIterator iterator = (FetchDatumIterator) palloc0(sizeof(FetchDatumIteratorData));
+
+	/* Must copy to access aligned fields */
+	VARATT_EXTERNAL_GET_POINTER(iterator->toast_pointer, attr);
+
+	iterator->ressize = iterator->toast_pointer.va_extsize;
+	iterator->numchunks = ((iterator->ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
+
+	/*
+	 * Open the toast relation and its indexes
+	 */
+	iterator->toastrel = table_open(iterator->toast_pointer.va_toastrelid, AccessShareLock);
+
+	/* Look for the valid index of the toast relation */
+	validIndex = toast_open_indexes(iterator->toastrel,
+									AccessShareLock,
+									&iterator->toastidxs,
+									&iterator->num_indexes);
+
+	/*
+	 * Setup a scan key to fetch from the index by va_valueid
+	 */
+	ScanKeyInit(&iterator->toastkey,
+				(AttrNumber) 1,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(iterator->toast_pointer.va_valueid));
+
+	/*
+	 * Read the chunks by index
+	 *
+	 * Note that because the index is actually on (valueid, chunkidx) we will
+	 * see the chunks in chunkidx order, even though we didn't explicitly ask
+	 * for it.
+	 */
+
+	init_toast_snapshot(&iterator->SnapshotToast);
+	iterator->toastscan = systable_beginscan_ordered(iterator->toastrel, iterator->toastidxs[validIndex],
+										   &iterator->SnapshotToast, 1, &iterator->toastkey);
+
+	iterator->buf = (ToastBuffer *) palloc0(sizeof(ToastBuffer));
+	init_toast_buffer(iterator->buf, iterator->ressize + VARHDRSZ, VARATT_EXTERNAL_IS_COMPRESSED(iterator->toast_pointer));
+
+	iterator->nextidx = 0;
+	iterator->done = false;
+
+	return iterator;
+}
+
+static bool
+free_fetch_datum_iterator(FetchDatumIterator iter)
+{
+	if (iter == NULL)
+	{
+		return false;
+	}
+
+	if (!iter->done)
+	{
+		systable_endscan_ordered(iter->toastscan);
+		toast_close_indexes(iter->toastidxs, iter->num_indexes, AccessShareLock);
+		table_close(iter->toastrel, AccessShareLock);
+	}
+	free_toast_buffer(iter->buf);
+	pfree(iter);
+	return true;
+}
+
+/* ----------
+ * fetch_datum_iterate -
+ *
+ * Iterate through the toasted value referenced by iterator.
+ *
+ * As long as there is another chunk data in compression or external storage,
+ * fetch it into buffer in iterator, and return slice length.
+ * Return -1 when no more data.
+ * ----------
+ */
+static int32
+fetch_datum_iterate(FetchDatumIterator iter) {
+	HeapTuple	ttup;
+	TupleDesc	toasttupDesc;
+	int32		residx;
+	Pointer		chunk;
+	bool		isnull;
+	char		*chunkdata;
+	int32		chunksize;
+
+	if (iter == NULL)
+	{
+		elog(ERROR, "fetch_datum_iterate shouln't be called for NULL iterator");
+	}
+
+	if (iter->done)
+	{
+		return -1;
+	}
+
+	ttup = systable_getnext_ordered(iter->toastscan, ForwardScanDirection);
+	if (ttup == NULL)
+	{
+		/*
+		 * Final checks that we successfully fetched the datum
+		 */
+		if (iter->nextidx != iter->numchunks)
+			elog(ERROR, "missing chunk number %d for toast value %u in %s",
+				 iter->nextidx,
+				 iter->toast_pointer.va_valueid,
+				 RelationGetRelationName(iter->toastrel));
+
+		/*
+		 * End scan and close relations
+		 */
+		systable_endscan_ordered(iter->toastscan);
+		toast_close_indexes(iter->toastidxs, iter->num_indexes, AccessShareLock);
+		table_close(iter->toastrel, AccessShareLock);
+
+		iter->done = true;
+		return -1;
+	}
+
+	/*
+	 * Have a chunk, extract the sequence number and the data
+	 */
+	toasttupDesc = iter->toastrel->rd_att;
+	residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
+	Assert(!isnull);
+	chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
+	Assert(!isnull);
+	if (!VARATT_IS_EXTENDED(chunk))
+	{
+		chunksize = VARSIZE(chunk) - VARHDRSZ;
+		chunkdata = VARDATA(chunk);
+	}
+	else if (VARATT_IS_SHORT(chunk))
+	{
+		/* could happen due to heap_form_tuple doing its thing */
+		chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+		chunkdata = VARDATA_SHORT(chunk);
+	}
+	else
+	{
+		/* should never happen */
+		elog(ERROR, "found toasted toast chunk for toast value %u in %s",
+			 iter->toast_pointer.va_valueid,
+			 RelationGetRelationName(iter->toastrel));
+		chunksize = 0;		/* keep compiler quiet */
+		chunkdata = NULL;
+	}
+
+	/*
+	 * Some checks on the data we've found
+	 */
+	if (residx != iter->nextidx)
+		elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
+			 residx, iter->nextidx,
+			 iter->toast_pointer.va_valueid,
+			 RelationGetRelationName(iter->toastrel));
+	if (residx < iter->numchunks - 1)
+	{
+		if (chunksize != TOAST_MAX_CHUNK_SIZE)
+			elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
+				 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
+				 residx, iter->numchunks,
+				 iter->toast_pointer.va_valueid,
+				 RelationGetRelationName(iter->toastrel));
+	}
+	else if (residx == iter->numchunks - 1)
+	{
+		if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != iter->ressize)
+			elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
+				 chunksize,
+				 (int) (iter->ressize - residx * TOAST_MAX_CHUNK_SIZE),
+				 residx,
+				 iter->toast_pointer.va_valueid,
+				 RelationGetRelationName(iter->toastrel));
+	}
+	else
+		elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
+			 residx,
+			 0, iter->numchunks - 1,
+			 iter->toast_pointer.va_valueid,
+			 RelationGetRelationName(iter->toastrel));
+
+	/*
+	 * Copy the data into proper place in our iterator buffer
+	 */
+	memcpy(iter->buf->limit, chunkdata, chunksize);
+	iter->buf->limit += chunksize;
+
+	iter->nextidx++;
+	return chunksize;
+}
+
+
+static void
+init_toast_buffer(ToastBuffer *buf, int32 size, bool compressed) {
+	buf->buf = (const char *) palloc0(size);
+	if (compressed) {
+		SET_VARSIZE_COMPRESSED(buf->buf, size);
+		buf->position = VARDATA_4B_C(buf->buf);
+	}
+	else
+	{
+		SET_VARSIZE(buf->buf, size);
+		buf->position = VARDATA_4B(buf->buf);
+	}
+	buf->limit = VARDATA(buf->buf);
+	buf->capacity = buf->buf + size;
+	buf->buf_size = size;
+}
+
+
+static bool
+free_toast_buffer(ToastBuffer *buf)
+{
+	if (buf == NULL)
+	{
+		return false;
+	}
+
+	pfree(buf->buf);
+	pfree(buf);
+}
+
+
+/* ----------
+ * pglz_decompress_iterate -
+ *
+ *		Decompresses source into dest. Returns the number of available bytes
+ *		decompressed in the destination buffer.
+ * ----------
+ */
+static int32
+pglz_decompress_iterate(ToastBuffer *source, ToastBuffer *dest, DetoastIterator iter, int32 need_len)
+{
+	const unsigned char *sp;
+	const unsigned char *srcend;
+	unsigned char *dp;
+	unsigned char *destend;
+
+	sp = (const unsigned char *) source->position;
+	srcend = ((const unsigned char *) source->limit);
+	dp = (unsigned char *) dest->limit;
+	destend = (unsigned char *)dest->capacity;
+
+	while (sp < srcend && dp < destend && ((char *)dp - dest->position) < need_len)
+	{
+		/*
+		 * Read one control byte and process the next 8 items (or as many as
+		 * remain in the compressed input).
+		 */
+		unsigned char ctrl;
+		int			ctrlc;
+		if (iter->ctrlc != 0) {
+			ctrl = iter->ctrl;
+			ctrlc = iter->ctrlc;
+		}
+		else
+		{
+			ctrl = *sp++;
+			ctrlc = 0;
+		}
+
+
+		for (; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++)
+		{
+
+			if (ctrl & 1)
+			{
+				/*
+				 * Otherwise it contains the match length minus 3 and the
+				 * upper 4 bits of the offset. The next following byte
+				 * contains the lower 8 bits of the offset. If the length is
+				 * coded as 18, another extension tag byte tells how much
+				 * longer the match really was (0-255).
+				 */
+				int32		len;
+				int32		off;
+
+				len = (sp[0] & 0x0f) + 3;
+				off = ((sp[0] & 0xf0) << 4) | sp[1];
+				sp += 2;
+				if (len == 18)
+					len += *sp++;
+
+				/*
+				 * Now we copy the bytes specified by the tag from OUTPUT to
+				 * OUTPUT. It is dangerous and platform dependent to use
+				 * memcpy() here, because the copied areas could overlap
+				 * extremely!
+				 */
+				len = Min(len, destend - dp);
+				while (len--)
+				{
+					*dp = dp[-off];
+					dp++;
+				}
+			}
+			else
+			{
+				/*
+				 * An unset control bit means LITERAL BYTE. So we just copy
+				 * one from INPUT to OUTPUT.
+				 */
+				*dp++ = *sp++;
+			}
+
+			/*
+			 * Advance the control bit
+			 */
+			ctrl >>= 1;
+		}
+
+		if (ctrlc < 8) {
+			iter->ctrlc = ctrlc;
+			iter->ctrl = ctrl;
+		}
+		else
+		{
+			iter->ctrlc = 0;
+		}
+	}
+
+	source->position = (char *) sp;
+	dest->limit = (char *) dp;
+	return dest->limit - dest->position;
+}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0864838..e7fab58 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -122,10 +122,10 @@ static text *text_substring(Datum str,
 							int32 length,
 							bool length_not_specified);
 static text *text_overlay(text *t1, text *t2, int sp, int sl);
-static int	text_position(text *t1, text *t2, Oid collid);
+static int	text_position(text *t1, text *t2, Oid collid, DetoastIterator iter);
 static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
-static bool text_position_next(TextPositionState *state);
-static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
+static bool text_position_next(TextPositionState *state, DetoastIterator iter);
+static char *text_position_next_internal(char *start_ptr, TextPositionState *state, DetoastIterator iter);
 static char *text_position_get_match_ptr(TextPositionState *state);
 static int	text_position_get_match_pos(TextPositionState *state);
 static void text_position_cleanup(TextPositionState *state);
@@ -1092,10 +1092,19 @@ text_overlay(text *t1, text *t2, int sp, int sl)
 Datum
 textpos(PG_FUNCTION_ARGS)
 {
-	text	   *str = PG_GETARG_TEXT_PP(0);
+	text		*str;
+	DetoastIterator iter = create_detoast_iterator((struct varlena *)(DatumGetPointer(PG_GETARG_DATUM(0))));
+	if (iter != NULL)
+	{
+		str = (text *) iter->buf->buf;
+	}
+	else
+	{
+		str = PG_GETARG_TEXT_PP(0);
+	}
 	text	   *search_str = PG_GETARG_TEXT_PP(1);
 
-	PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
+	PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION(), iter));
 }
 
 /*
@@ -1113,7 +1122,7 @@ textpos(PG_FUNCTION_ARGS)
  *	functions.
  */
 static int
-text_position(text *t1, text *t2, Oid collid)
+text_position(text *t1, text *t2, Oid collid, DetoastIterator iter)
 {
 	TextPositionState state;
 	int			result;
@@ -1122,15 +1131,15 @@ text_position(text *t1, text *t2, Oid collid)
 		return 0;
 
 	text_position_setup(t1, t2, collid, &state);
-	if (!text_position_next(&state))
+	if (!text_position_next(&state, iter))
 		result = 0;
 	else
 		result = text_position_get_match_pos(&state);
 	text_position_cleanup(&state);
+	free_detoast_iterator(iter);
 	return result;
 }
 
-
 /*
  * text_position_setup, text_position_next, text_position_cleanup -
  *	Component steps of text_position()
@@ -1274,7 +1283,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
  * is found.
  */
 static bool
-text_position_next(TextPositionState *state)
+text_position_next(TextPositionState *state, DetoastIterator iter)
 {
 	int			needle_len = state->len2;
 	char	   *start_ptr;
@@ -1290,7 +1299,7 @@ text_position_next(TextPositionState *state)
 		start_ptr = state->str1;
 
 retry:
-	matchptr = text_position_next_internal(start_ptr, state);
+	matchptr = text_position_next_internal(start_ptr, state, iter);
 
 	if (!matchptr)
 		return false;
@@ -1338,7 +1347,7 @@ retry:
  * match starting at 'start_ptr', or NULL if no match is found.
  */
 static char *
-text_position_next_internal(char *start_ptr, TextPositionState *state)
+text_position_next_internal(char *start_ptr, TextPositionState *state, DetoastIterator iter)
 {
 	int			haystack_len = state->len1;
 	int			needle_len = state->len2;
@@ -1358,6 +1367,9 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
 		hptr = start_ptr;
 		while (hptr < haystack_end)
 		{
+			if (iter != NULL) {
+				detoast_iterate(hptr - iter->buf->position + 1, iter);
+			}
 			if (*hptr == nchar)
 				return (char *) hptr;
 			hptr++;
@@ -1371,6 +1383,10 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
 		hptr = start_ptr + needle_len - 1;
 		while (hptr < haystack_end)
 		{
+			if (iter != NULL) {
+				detoast_iterate(hptr - iter->buf->position + 1, iter);
+			}
+
 			/* Match the needle scanning *backward* */
 			const char *nptr;
 			const char *p;
@@ -4231,7 +4247,7 @@ replace_text(PG_FUNCTION_ARGS)
 
 	text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
 
-	found = text_position_next(&state);
+	found = text_position_next(&state, NULL);
 
 	/* When the from_sub_text is not found, there is nothing to do. */
 	if (!found)
@@ -4256,7 +4272,7 @@ replace_text(PG_FUNCTION_ARGS)
 
 		start_ptr = curr_ptr + from_sub_text_len;
 
-		found = text_position_next(&state);
+		found = text_position_next(&state, NULL);
 		if (found)
 			curr_ptr = text_position_get_match_ptr(&state);
 	}
@@ -4594,7 +4610,7 @@ split_text(PG_FUNCTION_ARGS)
 
 	/* identify bounds of first field */
 	start_ptr = VARDATA_ANY(inputstring);
-	found = text_position_next(&state);
+	found = text_position_next(&state, NULL);
 
 	/* special case if fldsep not found at all */
 	if (!found)
@@ -4612,7 +4628,7 @@ split_text(PG_FUNCTION_ARGS)
 	{
 		/* identify bounds of next field */
 		start_ptr = end_ptr + fldsep_len;
-		found = text_position_next(&state);
+		found = text_position_next(&state, NULL);
 		if (found)
 			end_ptr = text_position_get_match_ptr(&state);
 	}
@@ -4766,7 +4782,7 @@ text_to_array_internal(PG_FUNCTION_ARGS)
 
 			CHECK_FOR_INTERRUPTS();
 
-			found = text_position_next(&state);
+			found = text_position_next(&state, NULL);
 			if (!found)
 			{
 				/* fetch last field */
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index f0aea24..e60815d 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -17,6 +17,98 @@
 #include "storage/lockdefs.h"
 #include "utils/relcache.h"
 
+#ifndef FRONTEND
+#include "access/genam.h"
+
+/*
+ * TOAST buffer is a producer consumer buffer.
+ *
+ *    +--+--+--+--+--+--+--+--+--+--+--+--+--+
+ *    |  |  |  |  |  |  |  |  |  |  |  |  |  |
+ *    +--+--+--+--+--+--+--+--+--+--+--+--+--+
+ *    ^           ^           ^              ^
+ *   buf      position      limit         capacity
+ *
+ * buf: point to the start of buffer.
+ * position: point to the next char to be consume.
+ * limit: point to the next char to be produce.
+ * capacity: point to the end of buffer.
+ *
+ * Constrains that need to be satisfied:
+ * buf <= position <= limit <= capacity
+ */
+typedef struct ToastBuffer
+{
+	const char	*buf;
+	const char	*position;
+	char		*limit;
+	const char	*capacity;
+	int32		buf_size;
+} ToastBuffer;
+
+
+typedef struct FetchDatumIteratorData
+{
+	ToastBuffer	*buf;
+	Relation	toastrel;
+	Relation	*toastidxs;
+	SysScanDesc	toastscan;
+	ScanKeyData	toastkey;
+	SnapshotData			SnapshotToast;
+	struct varatt_external	toast_pointer;
+	int32		ressize;
+	int32		nextidx;
+	int32		numchunks;
+	int			num_indexes;
+	bool		done;
+}				FetchDatumIteratorData;
+
+typedef struct FetchDatumIteratorData *FetchDatumIterator;
+
+typedef struct DetoastIteratorData
+{
+	ToastBuffer 		*buf;
+	FetchDatumIterator	fetch_datum_iterator;
+	ToastBuffer			*source;
+	unsigned char		ctrl;
+	int					ctrlc;
+	bool				compressed;		/* toast value is compressed? */
+	bool				done;			/* iterator exhausted? */
+}			DetoastIteratorData;
+
+typedef struct DetoastIteratorData *DetoastIterator;
+
+/* ----------
+ * create_detoast_iterator -
+ *
+ * Initialize detoast iterator.
+ * ----------
+ */
+extern DetoastIterator create_detoast_iterator(struct varlena *attr);
+
+/* ----------
+ * free_detoast_iterator -
+ *
+ * Free the memory space occupied by the de-Toast iterator.
+ * ----------
+ */
+extern bool free_detoast_iterator(DetoastIterator iter);
+
+/* ----------
+ * detoast_iterate -
+ *
+ * Iterate through the toasted value referenced by iterator.
+ *
+ * As long as there is another slice in compression or external storage,
+ * detoast it into toast buffer in iterator, and return available slice length.
+ * Return -1 when no more data.
+ * ----------
+ */
+extern int32 detoast_iterate(int32 length, DetoastIterator iter);
+
+#endif
+
+
 /*
  * This enables de-toasting of index entries.  Needed until VACUUM is
  * smart enough to rebuild indexes from scratch.
-- 
2.7.4

Reply via email to