diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 60f005c..87cd9ea 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -120,7 +120,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
 	Form_pg_attribute attr;
 
 	/* Initialize empty bitmap result */
-	scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
+	scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
 
 	/* Null query cannot partial-match anything */
 	if (scanEntry->isPartialMatch &&
diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c
index 4274e9a..94bb289 100644
--- a/src/backend/executor/nodeBitmapIndexscan.c
+++ b/src/backend/executor/nodeBitmapIndexscan.c
@@ -78,7 +78,7 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
 	else
 	{
 		/* XXX should we use less than work_mem for this? */
-		tbm = tbm_create(work_mem * 1024L);
+		tbm = tbm_create(work_mem * 1024L, NULL);
 	}
 
 	/*
diff --git a/src/backend/executor/nodeBitmapOr.c b/src/backend/executor/nodeBitmapOr.c
index 3c6155b..1d280be 100644
--- a/src/backend/executor/nodeBitmapOr.c
+++ b/src/backend/executor/nodeBitmapOr.c
@@ -129,7 +129,7 @@ MultiExecBitmapOr(BitmapOrState *node)
 			if (result == NULL) /* first subplan */
 			{
 				/* XXX should we use less than work_mem for this? */
-				result = tbm_create(work_mem * 1024L);
+				result = tbm_create(work_mem * 1024L, NULL);
 			}
 
 			((BitmapIndexScanState *) subnode)->biss_result = result;
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index 0885812..81b742b 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -43,6 +43,9 @@
 #include "access/htup_details.h"
 #include "nodes/bitmapset.h"
 #include "nodes/tidbitmap.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
+#include "utils/relptr.h"
 
 /*
  * The maximum number of tuples per page is not large (typically 256 with
@@ -102,6 +105,9 @@ typedef struct PagetableEntry
 	bitmapword	words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)];
 } PagetableEntry;
 
+/* Relative pointer of PagetableEntry to share across workers. */
+relptr_declare(PagetableEntry, RelptrPagetableEntry);
+
 /*
  * We want to avoid the overhead of creating the hashtable, which is
  * comparatively large, when not necessary. Particularly when we are using a
@@ -139,6 +145,13 @@ struct TIDBitmap
 	/* these are valid when iterating is true: */
 	PagetableEntry **spages;	/* sorted exact-page list, or NULL */
 	PagetableEntry **schunks;	/* sorted lossy-chunk list, or NULL */
+	dsa_pointer dsa_data;		/* dsa_pointer to the element array */
+	dsa_area   *dsa;			/* reference to per-query dsa area */
+	char	   *base;			/* pointer to the element array */
+	dsa_pointer dsapages;		/* dsa_pointer to the page array */
+	dsa_pointer dsachunks;		/* dsa_pointer to the chunk array */
+	RelptrPagetableEntry *relpages;		/* page array of relptr */
+	RelptrPagetableEntry *relchunks;	/* chunk array of relptr */
 };
 
 /*
@@ -156,6 +169,45 @@ struct TBMIterator
 	TBMIterateResult output;	/* MUST BE LAST (because variable-size) */
 };
 
+/* Shared TBMInfo to shared across multiple workers */
+typedef struct TBMSharedInfo
+{
+	dsa_pointer dsa_data;		/* dsa pointers to head of pagetable data */
+	dsa_pointer spages;			/* dsa pointer to page array */
+	dsa_pointer schunks;		/* dsa pointer to chunk array */
+	PagetableEntry entry1;		/* used when status == TBM_ONE_PAGE */
+	TBMStatus	status;			/* status of TIDBitmap */
+	int			nentries;		/* number of entries in pagetable */
+	int			maxentries;		/* limit on same to meet maxbytes */
+	int			npages;			/* number of exact entries in pagetable */
+	int			nchunks;		/* number of lossy entries in pagetable */
+}	TBMSharedInfo;
+
+/*
+ * This stores the shared members of TBMSharedIterator so that multiple
+ * workers can operate on the same state. It also stores the TBMSharedInfo,
+ * in order to share relptrs of the chunk and the pages arrays and other
+ * TBM related information with other workers.
+ */
+typedef struct TBMSharedIteratorState
+{
+	int			spageptr;		/* next spages index */
+	int			schunkptr;		/* next schunks index */
+	int			schunkbit;		/* next bit to check in current schunk */
+	LWLock		lock;			/* lock to protect above members */
+	TBMSharedInfo tbminfo;		/* TBM info to shared across workers */
+}	TBMSharedIteratorState;
+
+/*
+ * same as TBMIterator except that it holds a reference to the shared
+ * memory state so that multiple workers could operate on the same state.
+ */
+struct TBMSharedIterator
+{
+	TIDBitmap  *tbm;			/* TIDBitmap we're iterating over */
+	TBMSharedIteratorState *state;
+	TBMIterateResult output;
+};
 
 /* Local function prototypes */
 static void tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage);
@@ -168,6 +220,8 @@ static bool tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno);
 static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno);
 static void tbm_lossify(TIDBitmap *tbm);
 static int	tbm_comparator(const void *left, const void *right);
+static int tbm_shared_comparator(const void *left, const void *right,
+					  void *arg);
 
 /*
  * Simple inline murmur hash implementation for the exact width required, for
@@ -187,6 +241,7 @@ hash_blockno(BlockNumber b)
 }
 
 /* define hashtable mapping block numbers to PagetableEntry's */
+#define SH_USE_NONDEFAULT_ALLOCATOR
 #define SH_PREFIX pagetable
 #define SH_ELEMENT_TYPE PagetableEntry
 #define SH_KEY_TYPE BlockNumber
@@ -204,10 +259,12 @@ hash_blockno(BlockNumber b)
  *
  * The bitmap will live in the memory context that is CurrentMemoryContext
  * at the time of this call.  It will be limited to (approximately) maxbytes
- * total memory consumption.
+ * total memory consumption.  If dsa passed to this function is not NULL
+ * then the memory for storing elements of the underlying page table will
+ * be allocated from the DSA.
  */
 TIDBitmap *
-tbm_create(long maxbytes)
+tbm_create(long maxbytes, dsa_area *dsa)
 {
 	TIDBitmap  *tbm;
 	long		nbuckets;
@@ -230,6 +287,7 @@ tbm_create(long maxbytes)
 	nbuckets = Max(nbuckets, 16);		/* sanity limit */
 	tbm->maxentries = (int) nbuckets;
 	tbm->lossify_start = 0;
+	tbm->dsa = dsa;
 
 	return tbm;
 }
@@ -244,7 +302,7 @@ tbm_create_pagetable(TIDBitmap *tbm)
 	Assert(tbm->status != TBM_HASH);
 	Assert(tbm->pagetable == NULL);
 
-	tbm->pagetable = pagetable_create(tbm->mcxt, 128, NULL);
+	tbm->pagetable = pagetable_create(tbm->mcxt, 128, tbm);
 
 	/* If entry1 is valid, push it into the hashtable */
 	if (tbm->status == TBM_ONE_PAGE)
@@ -777,6 +835,129 @@ tbm_iterate(TBMIterator *iterator)
 }
 
 /*
+ *	tbm_shared_iterate - scan through next page of a TIDBitmap
+ *
+ *	As above, but this will iterate using shared iterator which is shared
+ *	across multiple workers.  We need to acquire the iterator LWLock, before
+ *	accessing the shared members.
+ */
+TBMIterateResult *
+tbm_shared_iterate(TBMSharedIterator *iterator)
+{
+	TIDBitmap  *tbm = iterator->tbm;
+	TBMIterateResult *output = &iterator->output;
+	TBMSharedIteratorState *state = iterator->state;
+
+	Assert(tbm->iterating);
+
+	/* Acquire the LWLock before accessing the shared members */
+	LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+	/*
+	 * If lossy chunk pages remain, make sure we've advanced schunkptr/
+	 * schunkbit to the next set bit.
+	 */
+	while (state->schunkptr < tbm->nchunks)
+	{
+		PagetableEntry *chunk = relptr_access(tbm->base,
+										   tbm->relchunks[state->schunkptr]);
+		int			schunkbit = state->schunkbit;
+
+		while (schunkbit < PAGES_PER_CHUNK)
+		{
+			int			wordnum = WORDNUM(schunkbit);
+			int			bitnum = BITNUM(schunkbit);
+
+			if ((chunk->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
+				break;
+			schunkbit++;
+		}
+		if (schunkbit < PAGES_PER_CHUNK)
+		{
+			state->schunkbit = schunkbit;
+			break;
+		}
+		/* advance to next chunk */
+		state->schunkptr++;
+		state->schunkbit = 0;
+	}
+
+	/*
+	 * If both chunk and per-page data remain, must output the numerically
+	 * earlier page.
+	 */
+	if (state->schunkptr < tbm->nchunks)
+	{
+		PagetableEntry *chunk;
+		PagetableEntry *page;
+		BlockNumber chunk_blockno;
+
+		chunk = relptr_access(tbm->base, tbm->relchunks[state->schunkptr]);
+		page = relptr_access(tbm->base, tbm->relpages[state->spageptr]);
+		chunk_blockno = chunk->blockno + state->schunkbit;
+
+		if (state->spageptr >= tbm->npages || chunk_blockno < page->blockno)
+		{
+			/* Return a lossy page indicator from the chunk */
+			output->blockno = chunk_blockno;
+			output->ntuples = -1;
+			output->recheck = true;
+			state->schunkbit++;
+
+			LWLockRelease(&state->lock);
+			return output;
+		}
+	}
+
+	if (state->spageptr < tbm->npages)
+	{
+		PagetableEntry *page;
+		int			ntuples;
+		int			wordnum;
+
+		/* In ONE_PAGE state, we don't allocate an spages[] array */
+		if (tbm->status == TBM_ONE_PAGE)
+			page = &tbm->entry1;
+		else
+			page = relptr_access(tbm->base, tbm->relpages[state->spageptr]);
+
+		/* scan bitmap to extract individual offset numbers */
+		ntuples = 0;
+		for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
+		{
+			bitmapword	w = page->words[wordnum];
+
+			if (w != 0)
+			{
+				int			off = wordnum * BITS_PER_BITMAPWORD + 1;
+
+				while (w != 0)
+				{
+					if (w & 1)
+						output->offsets[ntuples++] = (OffsetNumber) off;
+					off++;
+					w >>= 1;
+				}
+			}
+		}
+
+		output->blockno = page->blockno;
+		output->ntuples = ntuples;
+		output->recheck = page->recheck;
+		state->spageptr++;
+
+		LWLockRelease(&state->lock);
+
+		return output;
+	}
+
+	LWLockRelease(&state->lock);
+
+	/* Nothing more in the bitmap */
+	return NULL;
+}
+
+/*
  * tbm_end_iterate - finish an iteration over a TIDBitmap
  *
  * Currently this is just a pfree, but it might do more someday.  (For
@@ -790,6 +971,17 @@ tbm_end_iterate(TBMIterator *iterator)
 }
 
 /*
+ * tbm_end_shared_iterate - finish an iteration over a TIDBitmap
+ *
+ * As above, but it frees the memory of TBMSharedIterator.
+ */
+void
+tbm_end_shared_iterate(TBMSharedIterator *iterator)
+{
+	pfree(iterator);
+}
+
+/*
  * tbm_find_pageentry - find a PagetableEntry for the pageno
  *
  * Returns NULL if there is no non-lossy entry for the pageno.
@@ -1061,3 +1253,246 @@ tbm_comparator(const void *left, const void *right)
 		return 1;
 	return 0;
 }
+
+/*
+ * As above, but this will get relptrs (relative pointer) of PagetableEntry
+ * and it needs to convert it to actual PagetableEntry before comparing the
+ * blockno.
+ */
+static int
+tbm_shared_comparator(const void *left, const void *right, void *arg)
+{
+	PagetableEntry *lpage;
+	PagetableEntry *rpage;
+
+	lpage = relptr_access((char *) arg, *((RelptrPagetableEntry *) left));
+	rpage = relptr_access((char *) arg, *((RelptrPagetableEntry *) right));
+
+	if (lpage->blockno < rpage->blockno)
+		return -1;
+	else if (lpage->blockno > rpage->blockno)
+		return 1;
+	return 0;
+}
+
+/*
+ * tbm_prepare_shared_iterate - prepare to iterator through a TIDBitmap
+ * by multiple workers using shared iterator.
+ *
+ * The TBMSharedIteratorState will be allocated from DSA so that multiple
+ * worker can attach to it and iterate jointly.
+ *
+ * This will convert the pagetable hash into page and chunk array of
+ * reptr (relative pointer) so that these arrays can be shared across
+ * multiple workers.
+ */
+dsa_pointer
+tbm_prepare_shared_iterate(TIDBitmap *tbm)
+{
+	dsa_pointer iterator;
+	TBMSharedIteratorState *iterater_state;
+	TBMSharedInfo *tbminfo;
+
+	/*
+	 * Create the TBMIterator struct, with enough trailing space to serve the
+	 * needs of the TBMIterateResult sub-struct.
+	 */
+	iterator = dsa_allocate(tbm->dsa, sizeof(TBMSharedIteratorState));
+	iterater_state = dsa_get_address(tbm->dsa, iterator);
+	tbminfo = &iterater_state->tbminfo;
+
+	/*
+	 * If we have a hashtable, create and fill the sorted page lists, unless
+	 * we already did that for a previous iterator.  Note that the lists are
+	 * attached to the bitmap not the iterator, so they can be used by more
+	 * than one iterator.  However, we keep dsa_pointer to these in the shared
+	 * iterator so that other workers can have access to these and store in
+	 * their local TBM.
+	 */
+	if (tbm->status == TBM_HASH && !tbm->iterating)
+	{
+		pagetable_iterator i;
+		PagetableEntry *page;
+		int			npages;
+		int			nchunks;
+
+		/*
+		 * Create page list and chunk list using relptr so that we can share
+		 * this information across multiple workers.
+		 */
+		if (tbm->npages)
+		{
+			tbm->dsapages = dsa_allocate(tbm->dsa,
+							   tbm->npages * (sizeof(RelptrPagetableEntry)));
+			tbm->relpages = dsa_get_address(tbm->dsa, tbm->dsapages);
+		}
+		if (tbm->nchunks)
+		{
+			tbm->dsachunks = dsa_allocate(tbm->dsa,
+							  tbm->nchunks * (sizeof(RelptrPagetableEntry)));
+			tbm->relchunks = dsa_get_address(tbm->dsa, tbm->dsachunks);
+		}
+
+		tbm->base = dsa_get_address(tbm->dsa, tbm->dsa_data);
+
+		npages = nchunks = 0;
+		pagetable_start_iterate(tbm->pagetable, &i);
+		while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL)
+		{
+			if (page->ischunk)
+				relptr_store(tbm->base, tbm->relchunks[nchunks++], page);
+			else
+				relptr_store(tbm->base, tbm->relpages[npages++], page);
+		}
+
+		Assert(npages == tbm->npages);
+		Assert(nchunks == tbm->nchunks);
+		if (npages > 1)
+			qsort_arg(tbm->relpages, npages, sizeof(RelptrPagetableEntry *),
+					  tbm_shared_comparator, (void *) tbm->base);
+		if (nchunks > 1)
+			qsort_arg(tbm->relchunks, nchunks, sizeof(RelptrPagetableEntry *),
+					  tbm_shared_comparator, (void *) tbm->base);
+	}
+
+	/*
+	 * Store the TBM member in the shared state.  This is done to shared it
+	 * across multiple workers.
+	 */
+	tbminfo->maxentries = tbm->maxentries;
+	tbminfo->nchunks = tbm->nchunks;
+	tbminfo->nentries = tbm->nentries;
+	tbminfo->npages = tbm->npages;
+	tbminfo->dsa_data = tbm->dsa_data;
+	tbminfo->spages = tbm->dsapages;
+	tbminfo->schunks = tbm->dsachunks;
+	tbminfo->status = tbm->status;
+
+	if (tbm->status == TBM_ONE_PAGE)
+		memcpy(&tbminfo->entry1, &tbm->entry1, sizeof(PagetableEntry));
+
+	/* Initialize the shared iterator state. */
+	iterater_state->schunkbit = 0;
+	iterater_state->schunkptr = 0;
+	iterater_state->spageptr = 0;
+
+
+	LWLockInitialize(&iterater_state->lock, LWTRANCHE_PARALLEL_TBM_ITERATOR);
+
+	tbm->iterating = true;
+
+	return iterator;
+}
+
+/*
+ *	tbm_attach_shared_iterate
+ *
+ *	Allocate an iterator and attach shared iterator state to it so that
+ *	multiple workers can access the same memory. We also need to copy
+ *	some of the TBM related information from shared state to TBM because
+ *	workers (other than the leader) would have created a local TBM therefore
+ *	they have to get these information from shared location.
+ */
+TBMSharedIterator *
+tbm_attach_shared_iterate(TIDBitmap *tbm, dsa_area *dsa, dsa_pointer iterator)
+{
+	TBMSharedIterator *shared_iterator;
+	TBMSharedInfo *tbminfo;
+
+	shared_iterator = (TBMSharedIterator *) palloc(sizeof(TBMIterator) +
+								 MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
+
+	shared_iterator->tbm = tbm;
+	shared_iterator->state =
+		(TBMSharedIteratorState *) dsa_get_address(dsa, iterator);
+	tbminfo = &shared_iterator->state->tbminfo;
+
+	/*
+	 * If we have come here from worker other than the leader then we need to
+	 * get TBM related information from shared iterator status. This needs to
+	 * be done only once per worker.
+	 */
+	if (!tbm->iterating)
+	{
+		tbm->status = tbminfo->status;
+		tbm->nchunks = tbminfo->nchunks;
+		tbm->nentries = tbminfo->nentries;
+		tbm->npages = tbminfo->npages;
+		tbm->maxentries = tbminfo->maxentries;
+		if (tbm->status == TBM_HASH)
+		{
+			tbm->dsa_data = tbminfo->dsa_data;
+			tbm->base = dsa_get_address(dsa, tbm->dsa_data);
+
+			/* Convert dsa pointer to the local pointer */
+			if (tbm->npages)
+				tbm->relpages = dsa_get_address(tbm->dsa, tbminfo->spages);
+
+			if (tbm->nchunks)
+				tbm->relchunks = dsa_get_address(tbm->dsa, tbminfo->schunks);
+		}
+		else
+			memcpy(&tbm->entry1, &tbminfo->entry1, sizeof(PagetableEntry));
+
+		tbm->iterating = true;
+	}
+
+	return shared_iterator;
+}
+
+/*
+ * tbm_alloc_shared
+ *
+ * Callback function for allocating the memory for hashtable elements.
+ * It allocates memory from DSA if tbm holds a reference to a dsa.
+ */
+static inline void *
+pagetable_allocate(pagetable_hash *pagetable, Size size)
+{
+	TIDBitmap  *tbm = (TIDBitmap *) pagetable->private_data;
+	dsa_pointer dsaptr;
+	char	   *ptr;
+
+	if (tbm->dsa == NULL)
+		return MemoryContextAllocExtended(pagetable->ctx, size,
+										  MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
+
+	/* Add the size for storing dsa_pointer */
+	dsaptr = dsa_allocate(tbm->dsa, size + sizeof(dsa_pointer));
+
+	tbm->dsa_data = dsaptr;
+
+	ptr = dsa_get_address(tbm->dsa, dsaptr);
+	memset(ptr, 0, size + sizeof(dsa_pointer));
+
+	/* Store dsa_pointer */
+	*((dsa_pointer *) ptr) = dsaptr;
+
+	return (ptr + sizeof(dsa_pointer));
+}
+
+/*
+ * pagetable_free
+ *
+ * Callback function for freeing hash table elements.
+ */
+static inline void
+pagetable_free(pagetable_hash *pagetable, void *pointer)
+{
+	TIDBitmap  *tbm = (TIDBitmap *) pagetable->private_data;
+	dsa_pointer dsa_data;
+
+	if (tbm->dsa == NULL)
+		return pfree(pointer);
+
+	/*
+	 * If TBM is in iterating phase that means pagetable is already created
+	 * and we have come here during tbm_free. By this time we are already
+	 * detached from the DSA because the GatherNode would have been shutdown.
+	 */
+	if (tbm->iterating)
+		return;
+
+	dsa_data = *((dsa_pointer *) ((char *) pointer - sizeof(dsa_pointer)));
+	dsa_free(tbm->dsa, dsa_data);
+}
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index 14992e0..3889a42 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -24,7 +24,6 @@
 
 #include "storage/itemptr.h"
 
-
 /*
  * Actual bitmap representation is private to tidbitmap.c.  Callers can
  * do IsA(x, TIDBitmap) on it, but nothing else.
@@ -33,6 +32,7 @@ typedef struct TIDBitmap TIDBitmap;
 
 /* Likewise, TBMIterator is private */
 typedef struct TBMIterator TBMIterator;
+typedef struct TBMSharedIterator TBMSharedIterator;
 
 /* Result structure for tbm_iterate */
 typedef struct
@@ -46,7 +46,7 @@ typedef struct
 
 /* function prototypes in nodes/tidbitmap.c */
 
-extern TIDBitmap *tbm_create(long maxbytes);
+extern TIDBitmap *tbm_create(long maxbytes, dsa_area *dsa);
 extern void tbm_free(TIDBitmap *tbm);
 
 extern void tbm_add_tuples(TIDBitmap *tbm,
@@ -62,5 +62,10 @@ extern bool tbm_is_empty(const TIDBitmap *tbm);
 extern TBMIterator *tbm_begin_iterate(TIDBitmap *tbm);
 extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
 extern void tbm_end_iterate(TBMIterator *iterator);
+extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
+extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm);
+extern TBMSharedIterator *tbm_attach_shared_iterate(TIDBitmap *tbm,
+						  dsa_area *dsa, dsa_pointer iterator);
+extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator);
 
 #endif   /* TIDBITMAP_H */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 8bd93c3..4d392e0 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -212,6 +212,7 @@ typedef enum BuiltinTrancheIds
 	LWTRANCHE_LOCK_MANAGER,
 	LWTRANCHE_PREDICATE_LOCK_MANAGER,
 	LWTRANCHE_PARALLEL_QUERY_DSA,
+	LWTRANCHE_PARALLEL_TBM_ITERATOR,
 	LWTRANCHE_FIRST_USER_DEFINED
 }	BuiltinTrancheIds;
 
