>From 7d6ad6345b97d6b9588dcdab39fab614c8cb6ece Mon Sep 17 00:00:00 2001
From: David Zhang <david.zhang@highgo.ca>
Date: Thu, 17 Nov 2022 12:22:16 -0800
Subject: [PATCH 2/4] support global unique index create

---
 src/backend/access/nbtree/nbtsort.c      | 364 ++++++++++++++++++++++-
 src/backend/commands/indexcmds.c         | 158 ++++++++++
 src/backend/storage/file/sharedfileset.c |  10 +
 src/backend/utils/sort/tuplesort.c       |  55 +++-
 src/include/commands/defrem.h            |   2 +
 src/include/nodes/execnodes.h            |   5 +
 src/include/nodes/parsenodes.h           |   4 +
 src/include/storage/sharedfileset.h      |   1 +
 src/include/utils/tuplesort.h            |  10 +
 src/test/regress/expected/indexing.out   |  12 +
 src/test/regress/sql/indexing.sql        |  12 +
 11 files changed, 622 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 501e011ce1..deb399e36a 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -71,6 +71,7 @@
 #define PARALLEL_KEY_QUERY_TEXT			UINT64CONST(0xA000000000000004)
 #define PARALLEL_KEY_WAL_USAGE			UINT64CONST(0xA000000000000005)
 #define PARALLEL_KEY_BUFFER_USAGE		UINT64CONST(0xA000000000000006)
+#define PARALLEL_KEY_TUPLESORT_GLOBAL	UINT64CONST(0xA000000000000007)
 
 /*
  * DISABLE_LEADER_PARTICIPATION disables the leader's participation in
@@ -110,6 +111,7 @@ typedef struct BTShared
 	bool		nulls_not_distinct;
 	bool		isconcurrent;
 	int			scantuplesortstates;
+	bool		isglobal;
 
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
@@ -197,6 +199,7 @@ typedef struct BTLeader
 	Snapshot	snapshot;
 	WalUsage   *walusage;
 	BufferUsage *bufferusage;
+	Sharedsort *sharedsortglobal;
 } BTLeader;
 
 /*
@@ -226,6 +229,16 @@ typedef struct BTBuildState
 	 * BTBuildState.  Workers have their own spool and spool2, though.)
 	 */
 	BTLeader   *btleader;
+
+	/*
+	 * global unique index related parameters
+	 */
+	BTSpool    *spoolglobal;	/* spoolglobal is used on global unique index
+								 * build in parallel */
+	bool		global_index;	/* true if index is global */
+	int			globalIndexPart;	/* partition number indication */
+	int			nparts;			/* number of partitions involved in global
+								 * unique index build in parallel */
 } BTBuildState;
 
 /*
@@ -291,8 +304,29 @@ static void _bt_leader_participate_as_worker(BTBuildState *buildstate);
 static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 									   BTShared *btshared, Sharedsort *sharedsort,
 									   Sharedsort *sharedsort2, int sortmem,
-									   bool progress);
+									   bool progress, Sharedsort *sharedsortglobal,
+									   bool isworker);
 
+static BTSpool *global_btspool;
+Sharedsort *global_sharedsort;
+
+static void
+btinit_global_spool(Relation heap, Relation index, BTBuildState *buildstate)
+{
+	elog(DEBUG2, "%s: init global index spool", __FUNCTION__);
+	global_btspool = (BTSpool *) palloc0(sizeof(BTSpool));
+	global_btspool->heap = heap;
+	global_btspool->index = index;
+	global_btspool->isunique = buildstate->isunique;
+
+	global_btspool->sortstate =
+		tuplesort_begin_index_btree(heap, index, buildstate->isunique,
+									buildstate->nulls_not_distinct,
+									maintenance_work_mem, NULL,
+									false);
+
+	tuplesort_mark_global_sort(global_btspool->sortstate);
+}
 
 /*
  *	btbuild() -- build a new btree index.
@@ -318,22 +352,149 @@ btbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	buildstate.indtuples = 0;
 	buildstate.btleader = NULL;
 
+	if (indexInfo->ii_Global_index && indexInfo->ii_Unique)
+	{
+		/* copy global unique index related parameters to buildstate */
+		buildstate.global_index = indexInfo->ii_Global_index;
+		buildstate.globalIndexPart = indexInfo->ii_GlobalIndexPart;
+		buildstate.nparts = indexInfo->ii_Nparts;
+	}
+	else
+	{
+		/* disable global unique check */
+		buildstate.global_index = false;
+		buildstate.globalIndexPart = 0;
+		buildstate.nparts = 0;
+	}
+
 	/*
 	 * We expect to be called exactly once for any index relation. If that's
-	 * not the case, big trouble's what we have.
+	 * not the case, big trouble's what we have unless you set
+	 * buildGlobalSpool to true, which only builds the global spool for global
+	 * uniqueness check and not physical index tuples
 	 */
-	if (RelationGetNumberOfBlocks(index) != 0)
+	if (indexInfo->ii_BuildGlobalSpool == false && RelationGetNumberOfBlocks(index) != 0)
 		elog(ERROR, "index \"%s\" already contains data",
 			 RelationGetRelationName(index));
 
 	reltuples = _bt_spools_heapscan(heap, index, &buildstate, indexInfo);
 
+	if (indexInfo->ii_ParallelWorkers > 0)
+	{
+		/*
+		 * global uniqueness check in parallel build case
+		 */
+		if (indexInfo->ii_Global_index && indexInfo->ii_Unique && global_btspool)
+		{
+			/*
+			 * indexInfo->ii_GlobalIndexPart <= 0 indicates the first and
+			 * intermediate partition to build index on. For parallel global
+			 * unique index build, we need to clean up global_btspool
+			 * structure to prevent resource leak
+			 */
+			if (indexInfo->ii_GlobalIndexPart <= 0)
+			{
+				_bt_spooldestroy(global_btspool);
+				global_btspool = NULL;
+			}
+
+			/*
+			 * indexInfo->ii_GlobalIndexPart > 0 indicates the last partition
+			 * to build index on. For parallel global unique index build, we
+			 * need to call tuplesort_performsort to merge all the tapes
+			 * created from previous partition build runs and do a final
+			 * sorting to determine global uniqueness
+			 */
+			if (indexInfo->ii_GlobalIndexPart > 0)
+			{
+				IndexTuple	itup;
+
+				elog(DEBUG2, "last partitioned to build global index in parallel. Perform merge run and "
+					 "uniqueness check now...");
+				tuplesort_performsort(buildstate.spoolglobal->sortstate);
+
+				/*
+				 * this loop checks for uniqueness after all tapes have been
+				 * merged. If a duplicate is found, we will error out.
+				 */
+				while ((itup = tuplesort_getindextuple(buildstate.spoolglobal->sortstate,
+													   true)) != NULL)
+				{
+					/*
+					 * simply checking for global uniqueness, nothing to do
+					 * here
+					 */
+				}
+
+				/*
+				 * no global uniqueness violation is found at this point,
+				 * remove all the tapes (temp files) and destroy resources and
+				 * continue to build the actual index.
+				 */
+				_bt_spooldestroy(buildstate.spoolglobal);
+				_bt_spooldestroy(global_btspool);
+				global_btspool = NULL;
+				if (global_sharedsort)
+				{
+					pfree(global_sharedsort);
+					global_sharedsort = NULL;
+				}
+			}
+		}
+	}
+	else
+	{
+		/*
+		 * global uniqueness check in serial build case
+		 */
+		if (indexInfo->ii_GlobalIndexPart > 0 && indexInfo->ii_Global_index &&
+			indexInfo->ii_Unique && global_btspool)
+		{
+			/*
+			 * indexInfo->ii_GlobalIndexPart > 0 indicates the last partition
+			 * to build index on. For serial global unique index build, we
+			 * call tuplesort_performsort on global_btspool->sortstate which
+			 * should contain index tuples from all partitions. If a duplicate
+			 * is found, we will error out.
+			 */
+			elog(DEBUG2, "last partitioned to build global index serially. Sorting global_btspool for "
+				 "uniqueness check now...");
+			tuplesort_performsort(global_btspool->sortstate);
+
+			/*
+			 * no global uniqueness violation is found at this point, destroy
+			 * global_btspool structure and continue to build the actual
+			 * index.
+			 */
+			_bt_spooldestroy(global_btspool);
+			global_btspool = NULL;
+		}
+	}
+
 	/*
-	 * Finish the build by (1) completing the sort of the spool file, (2)
-	 * inserting the sorted tuples into btree pages and (3) building the upper
-	 * levels.  Finally, it may also be necessary to end use of parallelism.
+	 * if indexInfo->ii_BuildGlobalSpool is set, we will not continue to build
+	 * the actual index. This is used during a new partition attach, where we
+	 * just want to populate the global_btspool from current partitions
+	 * without building the actual indexes (because they exist already). Then,
+	 * we take that global_btspool and sort it with the tuples in newly
+	 * attached partition to determine if attach would violate global
+	 * uniquenes check
 	 */
-	_bt_leafbuild(buildstate.spool, buildstate.spool2);
+	if (indexInfo->ii_BuildGlobalSpool)
+	{
+		elog(DEBUG2, "ii_BuildGlobalSpool is set. Skip building actual index content");
+	}
+	else
+	{
+		/*
+		 * Finish the build by (1) completing the sort of the spool file, (2)
+		 * inserting the sorted tuples into btree pages and (3) building the
+		 * upper levels.  Finally, it may also be necessary to end use of
+		 * parallelism.
+		 */
+		_bt_leafbuild(buildstate.spool, buildstate.spool2);
+	}
+
 	_bt_spooldestroy(buildstate.spool);
 	if (buildstate.spool2)
 		_bt_spooldestroy(buildstate.spool2);
@@ -374,6 +535,7 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 	BTSpool    *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
 	SortCoordinate coordinate = NULL;
 	double		reltuples = 0;
+	SortCoordinate coordinateglobal = NULL;
 
 	/*
 	 * We size the sort area as maintenance_work_mem rather than work_mem to
@@ -395,8 +557,40 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 
 	/* Attempt to launch parallel worker scan when required */
 	if (indexInfo->ii_ParallelWorkers > 0)
+	{
+		/*
+		 * while building the last partition table in parallel global unique
+		 * index build, we need to allocate a primary spoolglobal, which will
+		 * be used in the leader process later to "take over" all tapes
+		 * created by previous partition runs
+		 */
+		if (buildstate->isunique && buildstate->global_index &&
+			buildstate->globalIndexPart > 0)
+		{
+			elog(DEBUG2, "init primary spoolglobal in last partition for parallel index build");
+			buildstate->spoolglobal = (BTSpool *) palloc0(sizeof(BTSpool));
+			buildstate->spoolglobal->heap = heap;
+			buildstate->spoolglobal->index = index;
+			buildstate->spoolglobal->isunique = indexInfo->ii_Unique;
+		}
 		_bt_begin_parallel(buildstate, indexInfo->ii_Concurrent,
 						   indexInfo->ii_ParallelWorkers);
+	}
+	else
+	{
+		/*
+		 * in serial global unique index build, we just need to allocate a
+		 * single global_btspool structure at the first partition build. The
+		 * rest of the partitions will add their index tuples to this single
+		 * global spool structure
+		 */
+		if (buildstate->isunique && buildstate->global_index &&
+			buildstate->globalIndexPart < 0)
+		{
+			elog(DEBUG2, "init new global_btspool for serial index build");
+			btinit_global_spool(heap, index, buildstate);
+		}
+	}
 
 	/*
 	 * If parallel build requested and at least one worker process was
@@ -409,6 +603,22 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 		coordinate->nParticipants =
 			buildstate->btleader->nparticipanttuplesorts;
 		coordinate->sharedsort = buildstate->btleader->sharedsort;
+
+		/*
+		 * set up a coordinator state for primary spoolglobal if we are doing
+		 * global unique index build in parallel. We do this at the last
+		 * partition create in parallel mode
+		 */
+		if (buildstate->isunique && buildstate->global_index && buildstate->globalIndexPart > 0)
+		{
+			elog(DEBUG2, "set up coordinate state for primary spoolglobal for "
+				 "parallel index build case");
+			coordinateglobal = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
+			coordinateglobal->isWorker = false;
+			coordinateglobal->nParticipants =
+				tuplesort_get_curr_workers(buildstate->btleader->sharedsortglobal);
+			coordinateglobal->sharedsort = buildstate->btleader->sharedsortglobal;
+		}
 	}
 
 	/*
@@ -438,6 +648,23 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 									maintenance_work_mem, coordinate,
 									TUPLESORT_NONE);
 
+	/*
+	 * initialize primary spoolglobal if we are doing global unique index
+	 * build in parallel. We do this at the last partition create in parallel
+	 * mode
+	 */
+	if (buildstate->btleader && buildstate->isunique &&
+		buildstate->global_index && buildstate->globalIndexPart > 0)
+	{
+		elog(DEBUG2, "tuplesort_begin_index_btree for primary spoolglobal for "
+			 "parallel index build case");
+		buildstate->spoolglobal->sortstate =
+			tuplesort_begin_index_btree(heap, index, buildstate->isunique,
+										buildstate->nulls_not_distinct,
+										maintenance_work_mem, coordinateglobal,
+										false);
+	}
+
 	/*
 	 * If building a unique index, put dead tuples in a second spool to keep
 	 * them out of the uniqueness check.  We expect that the second spool (for
@@ -487,6 +714,19 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 		reltuples = _bt_parallel_heapscan(buildstate,
 										  &indexInfo->ii_BrokenHotChain);
 
+	/*
+	 * all parallel workers should finish at this point, make backup of
+	 * sharedsortglobal, which is needed to persist the logical tape and temp
+	 * file information for next partition build when building global unique
+	 * index in parallel
+	 */
+	if (buildstate->btleader && buildstate->global_index &&
+		buildstate->isunique)
+	{
+		elog(DEBUG2, "all workers finished, backup sharedsortglobal");
+		tuplesort_copy_sharedsort2(global_sharedsort, global_btspool->sortstate);
+	}
+
 	/*
 	 * Set the progress target for the next phase.  Reset the block number
 	 * values set by table_index_build_scan
@@ -599,7 +839,11 @@ _bt_build_callback(Relation index,
 	 * processing
 	 */
 	if (tupleIsAlive || buildstate->spool2 == NULL)
+	{
 		_bt_spool(buildstate->spool, tid, values, isnull);
+		if (buildstate->global_index && buildstate->isunique && global_btspool)
+			_bt_spool(global_btspool, tid, values, isnull);
+	}
 	else
 	{
 		/* dead tuples are put into spool2 */
@@ -1458,9 +1702,11 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
 	Snapshot	snapshot;
 	Size		estbtshared;
 	Size		estsort;
+	Size		estsortglobal = 0;
 	BTShared   *btshared;
 	Sharedsort *sharedsort;
 	Sharedsort *sharedsort2;
+	Sharedsort *sharedsortglobal;
 	BTSpool    *btspool = buildstate->spool;
 	BTLeader   *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
 	WalUsage   *walusage;
@@ -1504,6 +1750,13 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
 	estsort = tuplesort_estimate_shared(scantuplesortstates);
 	shm_toc_estimate_chunk(&pcxt->estimator, estsort);
 
+	if (buildstate->isunique && buildstate->global_index)
+	{
+		/* global unique index case will estimate 1 more sharesort struct */
+		estsortglobal = tuplesort_estimate_shared(scantuplesortstates * buildstate->nparts);
+		shm_toc_estimate_chunk(&pcxt->estimator, estsortglobal);
+	}
+
 	/*
 	 * Unique case requires a second spool, and so we may have to account for
 	 * another shared workspace for that -- PARALLEL_KEY_TUPLESORT_SPOOL2
@@ -1571,6 +1824,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
 	btshared->havedead = false;
 	btshared->indtuples = 0.0;
 	btshared->brokenhotchain = false;
+	btshared->isglobal = buildstate->global_index;
 	table_parallelscan_initialize(btspool->heap,
 								  ParallelTableScanFromBTShared(btshared),
 								  snapshot);
@@ -1603,6 +1857,43 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
 		shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_SPOOL2, sharedsort2);
 	}
 
+	if (buildstate->isunique && buildstate->global_index)
+	{
+		/* global unique index case will allocate 1 more sharesort struct */
+		sharedsortglobal = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsortglobal);
+		if (!sharedsortglobal)
+			elog(ERROR, "failed to allocate shared memory space");
+
+		if (buildstate->globalIndexPart == -1)
+		{
+			elog(DEBUG2, "initialize and make a copy of sharedsortglobal for first time");
+			tuplesort_initialize_shared(sharedsortglobal, scantuplesortstates * buildstate->nparts, NULL);
+
+			/* save a copy of sharedsortglobal */
+			global_sharedsort = (Sharedsort *) palloc(estsortglobal);
+			tuplesort_copy_sharedsort(global_sharedsort, sharedsortglobal);
+		}
+		else if (buildstate->globalIndexPart == 0 || buildstate->globalIndexPart == 1)
+		{
+			elog(DEBUG2, "restore the copy of sharedsortglobal for subsequent processing");
+			tuplesort_copy_sharedsort(sharedsortglobal, global_sharedsort);
+
+			/* register for cleanup at the last partition index build */
+			if (buildstate->globalIndexPart == 1)
+			{
+				tuplesort_register_cleanup_callback(sharedsortglobal, pcxt->seg);
+			}
+		}
+		else
+		{
+			elog(ERROR, "invalid global inedx partition value %d", buildstate->globalIndexPart);
+		}
+
+		shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT_GLOBAL, sharedsortglobal);
+	}
+	else
+		sharedsortglobal = NULL;
+
 	/* Store query string for workers */
 	if (debug_query_string)
 	{
@@ -1636,6 +1927,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
 	btleader->snapshot = snapshot;
 	btleader->walusage = walusage;
 	btleader->bufferusage = bufferusage;
+	btleader->sharedsortglobal = sharedsortglobal;
 
 	/* If no workers were successfully launched, back out (do serial build) */
 	if (pcxt->nworkers_launched == 0)
@@ -1780,7 +2072,8 @@ _bt_leader_participate_as_worker(BTBuildState *buildstate)
 	/* Perform work common to all participants */
 	_bt_parallel_scan_and_sort(leaderworker, leaderworker2, btleader->btshared,
 							   btleader->sharedsort, btleader->sharedsort2,
-							   sortmem, true);
+							   sortmem, true, btleader->sharedsortglobal,
+							   false);
 
 #ifdef BTREE_BUILD_STATS
 	if (log_btree_build_stats)
@@ -1803,6 +2096,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
 	BTShared   *btshared;
 	Sharedsort *sharedsort;
 	Sharedsort *sharedsort2;
+	Sharedsort *sharedsortglobal;
 	Relation	heapRel;
 	Relation	indexRel;
 	LOCKMODE	heapLockmode;
@@ -1878,13 +2172,26 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
 		tuplesort_attach_shared(sharedsort2, seg);
 	}
 
+	if (btshared->isunique && btshared->isglobal)
+	{
+		sharedsortglobal = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT_GLOBAL, false);
+		tuplesort_attach_shared(sharedsortglobal, seg);
+		elog(DEBUG2, "worker %d processing global unique index", MyProcPid);
+	}
+	else
+	{
+		sharedsortglobal = NULL;
+		elog(DEBUG2, "worker %d processing regular index", MyProcPid);
+	}
+
 	/* Prepare to track buffer usage during parallel execution */
 	InstrStartParallelQuery();
 
 	/* Perform sorting of spool, and possibly a spool2 */
 	sortmem = maintenance_work_mem / btshared->scantuplesortstates;
 	_bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
-							   sharedsort2, sortmem, false);
+							   sharedsort2, sortmem, false,
+							   sharedsortglobal, true);
 
 	/* Report WAL/buffer usage during parallel execution */
 	bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
@@ -1919,7 +2226,8 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
 static void
 _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 						   BTShared *btshared, Sharedsort *sharedsort,
-						   Sharedsort *sharedsort2, int sortmem, bool progress)
+						   Sharedsort *sharedsort2, int sortmem, bool progress,
+						   Sharedsort *sharedsortglobal, bool isworker)
 {
 	SortCoordinate coordinate;
 	BTBuildState buildstate;
@@ -1965,6 +2273,28 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 										false);
 	}
 
+
+	/* global index */
+	if (sharedsortglobal)
+	{
+		SortCoordinate coordinate3;
+
+		global_btspool = (BTSpool *) palloc0(sizeof(BTSpool));
+		global_btspool->heap = btspool->heap;
+		global_btspool->index = btspool->index;
+		global_btspool->isunique = btspool->isunique;
+
+		coordinate3 = palloc0(sizeof(SortCoordinateData));
+		coordinate3->isWorker = true;
+		coordinate3->nParticipants = -1;
+		coordinate3->sharedsort = sharedsortglobal;
+		global_btspool->sortstate =
+			tuplesort_begin_index_btree(global_btspool->heap, global_btspool->index, global_btspool->isunique,
+										btspool->nulls_not_distinct, sortmem, coordinate3,
+										false);
+	}
+
+
 	/* Fill in buildstate for _bt_build_callback() */
 	buildstate.isunique = btshared->isunique;
 	buildstate.nulls_not_distinct = btshared->nulls_not_distinct;
@@ -1975,6 +2305,12 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 	buildstate.indtuples = 0;
 	buildstate.btleader = NULL;
 
+	if (btshared->isglobal && btshared->isunique)
+	{
+		/* fill global unique index related parameters in buildstate */
+		buildstate.global_index = btshared->isglobal;
+	}
+
 	/* Join parallel scan */
 	indexInfo = BuildIndexInfo(btspool->index);
 	indexInfo->ii_Concurrent = btshared->isconcurrent;
@@ -1997,6 +2333,11 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 		tuplesort_performsort(btspool2->sortstate);
 	}
 
+	if (global_btspool)
+	{
+		tuplesort_performsort(global_btspool->sortstate);
+	}
+
 	/*
 	 * Done.  Record ambuild statistics, and whether we encountered a broken
 	 * HOT chain.
@@ -2018,4 +2359,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 	tuplesort_end(btspool->sortstate);
 	if (btspool2)
 		tuplesort_end(btspool2->sortstate);
+
+	if (global_btspool && isworker)
+		tuplesort_end(global_btspool->sortstate);
 }
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 099bf68e77..51f6b46c99 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1145,6 +1145,13 @@ DefineIndex(Oid relationId,
 		flags |= INDEX_CREATE_PARTITIONED;
 	if (stmt->primary)
 		flags |= INDEX_CREATE_IS_PRIMARY;
+	/* copy the partition indication -1 = first, 0 = N/A, 1 = last */
+	if (stmt->global_index)
+	{
+		indexInfo->ii_Global_index = stmt->global_index;
+		indexInfo->ii_GlobalIndexPart = stmt->globalIndexPart;
+		indexInfo->ii_Nparts = stmt->nparts;
+	}
 
 	/*
 	 * If the table is partitioned, and recursion was declined but partitions
@@ -1380,6 +1387,24 @@ DefineIndex(Oid relationId,
 					bool		found_whole_row;
 					ListCell   *lc;
 
+					if (i == nparts - 1 && stmt->global_index)
+					{
+						elog(DEBUG2, "mark as last partitioned to scan");
+						childStmt->globalIndexPart = 1;
+					}
+
+					if (i == 0 && stmt->global_index)
+					{
+						elog(DEBUG2, "mark as first partitioned to scan");
+						childStmt->globalIndexPart = -1;
+					}
+
+					if (stmt->global_index)
+					{
+						childStmt->nparts = nparts;
+						elog(DEBUG2, "total partitions to build global index %d", childStmt->nparts);
+					}
+
 					/*
 					 * We can't use the same index name for the child index,
 					 * so clear idxname to let the recursive invocation choose
@@ -4367,3 +4392,136 @@ set_indexsafe_procflags(void)
 	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
 	LWLockRelease(ProcArrayLock);
 }
+
+bool
+PopulateGlobalSpool(Relation idxRel, Relation heapRel, IndexStmt *stmt)
+{
+	IndexInfo  *idxinfo;
+	int			numberOfKeyAttributes;
+	int			numberOfAttributes;
+	List	   *allIndexParams;
+	Oid			accessMethodId;
+	Form_pg_am	accessMethodForm;
+	char	   *accessMethodName;
+	HeapTuple	tuple;
+	bool		concurrent;
+	Oid		   *typeObjectId;
+	Oid		   *collationObjectId;
+	Oid		   *classObjectId;
+	int16	   *coloptions;
+	IndexAmRoutine *amRoutine;
+	bool		amcanorder;
+	Oid			root_save_userid;
+	int			root_save_sec_context;
+	int			root_save_nestlevel;
+
+	root_save_nestlevel = NewGUCNestLevel();
+
+	if (stmt->concurrent && get_rel_persistence(RelationGetRelid(heapRel)) != RELPERSISTENCE_TEMP)
+		concurrent = true;
+	else
+		concurrent = false;
+
+	allIndexParams = list_concat_copy(stmt->indexParams,
+									  stmt->indexIncludingParams);
+
+	numberOfKeyAttributes = list_length(stmt->indexParams);
+	numberOfAttributes = list_length(allIndexParams);
+
+	accessMethodName = stmt->accessMethod;
+	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+	if (!HeapTupleIsValid(tuple))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("access method \"%s\" does not exist",
+						accessMethodName)));
+	}
+
+	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
+	accessMethodId = accessMethodForm->oid;
+	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+
+	GetUserIdAndSecContext(&root_save_userid, &root_save_sec_context);
+	SetUserIdAndSecContext(heapRel->rd_rel->relowner,
+						   root_save_sec_context | SECURITY_RESTRICTED_OPERATION);
+
+	idxinfo = makeIndexInfo(numberOfAttributes,
+							numberOfKeyAttributes,
+							accessMethodId,
+							NIL,	/* expressions, NIL for now */
+							make_ands_implicit((Expr *) stmt->whereClause),
+							stmt->unique,
+							stmt->nulls_not_distinct,
+							!concurrent,
+							concurrent);
+
+	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
+	amcanorder = amRoutine->amcanorder;
+
+	pfree(amRoutine);
+	ReleaseSysCache(tuple);
+
+	ComputeIndexAttrs(idxinfo,
+					  typeObjectId, collationObjectId, classObjectId,
+					  coloptions, allIndexParams,
+					  stmt->excludeOpNames, RelationGetRelid(heapRel),
+					  accessMethodName, accessMethodId,
+					  amcanorder, stmt->isconstraint, root_save_userid,
+					  root_save_sec_context, &root_save_nestlevel);
+
+	/* Fill global unique index related parameters */
+	idxinfo->ii_GlobalIndexPart = stmt->globalIndexPart;
+	idxinfo->ii_BuildGlobalSpool = true;
+	idxinfo->ii_Nparts = stmt->nparts;
+	idxinfo->ii_Global_index = stmt->global_index;
+
+	/*
+	 * Determine worker process details for parallel CREATE INDEX.  Currently,
+	 * only btree has support for parallel builds.
+	 */
+	if (IsNormalProcessingMode() && idxRel->rd_rel->relam == BTREE_AM_OID)
+	{
+		idxinfo->ii_ParallelWorkers =
+			plan_create_index_workers(RelationGetRelid(heapRel),
+									  RelationGetRelid(idxRel));
+	}
+
+	idxRel->rd_indam->ambuild(heapRel, idxRel,
+							  idxinfo);
+
+	return true;
+}
+
+void
+ChangeRelKind(Relation idxRel, char kind)
+{
+	Relation	pg_class;
+	HeapTuple	tuple;
+	Form_pg_class classform;
+
+	/*
+	 * Get a writable copy of the pg_class tuple for the given relation.
+	 */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID,
+								ObjectIdGetDatum(RelationGetRelid(idxRel)));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "could not find tuple for relation %u",
+			 RelationGetRelid(idxRel));
+
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+
+	classform->relkind = kind;
+	idxRel->rd_rel->relkind = kind;
+
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	heap_freetuple(tuple);
+
+	table_close(pg_class, RowExclusiveLock);
+}
diff --git a/src/backend/storage/file/sharedfileset.c b/src/backend/storage/file/sharedfileset.c
index 0782f50ba6..d82d1abcc6 100644
--- a/src/backend/storage/file/sharedfileset.c
+++ b/src/backend/storage/file/sharedfileset.c
@@ -91,6 +91,16 @@ SharedFileSetDeleteAll(SharedFileSet *fileset)
 	FileSetDeleteAll(&fileset->fs);
 }
 
+/*
+ * Register cleanup callback of an already initialized fileset.
+ */
+void
+SharedFileSetRegisterCleanupCallback(SharedFileSet *fileset, dsm_segment *seg)
+{
+	/* Register our cleanup callback. */
+	if (seg)
+		on_dsm_detach(seg, SharedFileSetOnDetach, PointerGetDatum(fileset));
+}
 /*
  * Callback function that will be invoked when this backend detaches from a
  * DSM segment holding a SharedFileSet that it has created or attached to.  If
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 416f02ba3c..ef613e2fe2 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -328,6 +328,7 @@ struct Tuplesortstate
 	 */
 	int64		abbrevNext;		/* Tuple # at which to next check
 								 * applicability */
+	bool        isglobalsort;
 
 	/*
 	 * Resource snapshot for time of sort start.
@@ -2190,7 +2191,14 @@ mergeruns(Tuplesortstate *state)
 				/* Tell logtape.c we won't be writing anymore */
 				LogicalTapeSetForgetFreeSpace(state->tapeset);
 				/* Initialize for the final merge pass */
-				beginmerge(state);
+				if (state->isglobalsort)
+				{
+					elog(DEBUG2, "global unique index final merge run...");
+					mergeonerun(state);
+				}
+				else
+					beginmerge(state);
+
 				state->status = TSS_FINALMERGE;
 				return;
 			}
@@ -3002,6 +3010,51 @@ tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg)
 	SharedFileSetAttach(&shared->fileset, seg);
 }
 
+void
+tuplesort_mark_global_sort(Tuplesortstate *state)
+{
+	state->isglobalsort = true;
+}
+
+void
+tuplesort_copy_sharedsort(Sharedsort *shared1, Sharedsort *shared2)
+{
+	if (!shared1 || !shared2)
+		elog(ERROR, "%s: cannot do sharedsort copy due to bad input", __FUNCTION__);
+
+	shared1->currentWorker = shared2->currentWorker;
+	shared1->mutex = shared2->mutex;
+	shared1->nTapes = shared2->nTapes;
+	shared1->fileset = shared2->fileset;
+	shared1->workersFinished = shared2->workersFinished;
+	memcpy(shared1->tapes, shared2->tapes, sizeof(TapeShare) * shared2->nTapes);
+}
+
+void
+tuplesort_copy_sharedsort2(Sharedsort *shared1, Tuplesortstate *state)
+{
+	if (!shared1 || !state)
+		elog(ERROR, "%s: cannot do sharedsort copy due to bad input", __FUNCTION__);
+
+	shared1->currentWorker = state->shared->currentWorker;
+	shared1->mutex = state->shared->mutex;
+	shared1->nTapes = state->shared->nTapes;
+	shared1->fileset = state->shared->fileset;
+	shared1->workersFinished = state->shared->workersFinished;
+	memcpy(shared1->tapes, state->shared->tapes, sizeof(TapeShare) * state->shared->nTapes);
+}
+
+int
+tuplesort_get_curr_workers(Sharedsort *shared)
+{
+	return shared->currentWorker;
+}
+
+void tuplesort_register_cleanup_callback(Sharedsort *shared, dsm_segment *seg)
+{
+	SharedFileSetRegisterCleanupCallback(&shared->fileset, seg);
+}
+
 /*
  * worker_get_identifier - Assign and return ordinal identifier for worker
  *
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 1d3ce246c9..1593357d5d 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -47,6 +47,8 @@ extern bool CheckIndexCompatible(Oid oldId,
 extern Oid	GetDefaultOpClass(Oid type_id, Oid am_id);
 extern Oid	ResolveOpClass(List *opclass, Oid attrType,
 						   const char *accessMethodName, Oid accessMethodId);
+extern bool PopulateGlobalSpool(Relation ixsRel, Relation heapRel, IndexStmt *stmt);
+extern void ChangeRelKind(Relation idxRel, char kind);
 
 /* commands/functioncmds.c */
 extern ObjectAddress CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 01b1727fc0..c2c8039d3d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -197,6 +197,11 @@ typedef struct IndexInfo
 	int			ii_ParallelWorkers;
 	Oid			ii_Am;
 	void	   *ii_AmCache;
+	bool		ii_Global_index;	/* true if index is global */
+	int			ii_GlobalIndexPart; /* partition number indication */
+	bool		ii_BuildGlobalSpool;	/* indicate to build global spool only */
+	int			ii_Nparts;		/* num partitions for global index build in
+								 * parallel */
 	MemoryContext ii_Context;
 } IndexInfo;
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 880dd6011a..8febb506f0 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2992,6 +2992,10 @@ typedef struct IndexStmt
 	bool		reset_default_tblspc;	/* reset default_tablespace prior to
 										 * executing */
 	bool		global_index;	/* true if index is global */
+	int			globalIndexPart;	/* partition number indication */
+	int			nparts;			/* num partitions for global index build in
+								 * parallel */
+
 } IndexStmt;
 
 /* ----------------------
diff --git a/src/include/storage/sharedfileset.h b/src/include/storage/sharedfileset.h
index b1cde36d0b..50580d5140 100644
--- a/src/include/storage/sharedfileset.h
+++ b/src/include/storage/sharedfileset.h
@@ -33,5 +33,6 @@ typedef struct SharedFileSet
 extern void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg);
 extern void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg);
 extern void SharedFileSetDeleteAll(SharedFileSet *fileset);
+extern void SharedFileSetRegisterCleanupCallback(SharedFileSet *fileset, dsm_segment *seg);
 
 #endif
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 15f2a4a795..ccc389c1f4 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -247,6 +247,16 @@ typedef struct
 			elog(ERROR, "unexpected end of data"); \
 	} while(0)
 
+extern void tuplesort_mark_global_sort(Tuplesortstate *state);
+
+extern void tuplesort_copy_sharedsort(Sharedsort *shared1, Sharedsort *shared2);
+
+extern void tuplesort_copy_sharedsort2(Sharedsort *shared1, Tuplesortstate *state);
+
+extern int	tuplesort_get_curr_workers(Sharedsort *shared);
+
+extern void tuplesort_register_cleanup_callback(Sharedsort *shared, dsm_segment *seg);
+
 /*
  * We provide multiple interfaces to what is essentially the same code,
  * since different callers have different data to be sorted and want to
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index eed20063c1..30a3ce0f20 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -1461,3 +1461,15 @@ Partitions: gidxpart1_b_idx,
 
 drop index gidx_u;
 drop table gidxpart;
+-- Test the cross-partition uniqueness with non-partition key with global unique index
+create table gidxpart (a int, b int, c text) partition by range (a);
+create table gidxpart1 partition of gidxpart for values from (0) to (100000);
+create table gidxpart2 partition of gidxpart for values from (100000) to (199999);
+insert into gidxpart (a, b, c) values (42, 572814, 'inserted first on gidxpart1');
+insert into gidxpart (a, b, c) values (150000, 572814, 'inserted second on gidxpart2');
+create unique index on gidxpart (b) global; -- should fail
+ERROR:  could not create unique index "gidxpart1_b_idx"
+DETAIL:  Key (b)=(572814) is duplicated.
+delete from gidxpart where a = 150000 and b = 572814;
+create unique index on gidxpart (b) global;
+drop table gidxpart;
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 2169f28e69..84dde4df93 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -771,3 +771,15 @@ select relname, relhasindex, relkind from pg_class where relname like '%gidx%' o
 \d+ gidx_u
 drop index gidx_u;
 drop table gidxpart;
+
+-- Test the cross-partition uniqueness with non-partition key with global unique index
+create table gidxpart (a int, b int, c text) partition by range (a);
+create table gidxpart1 partition of gidxpart for values from (0) to (100000);
+create table gidxpart2 partition of gidxpart for values from (100000) to (199999);
+insert into gidxpart (a, b, c) values (42, 572814, 'inserted first on gidxpart1');
+insert into gidxpart (a, b, c) values (150000, 572814, 'inserted second on gidxpart2');
+create unique index on gidxpart (b) global; -- should fail
+delete from gidxpart where a = 150000 and b = 572814;
+create unique index on gidxpart (b) global;
+drop table gidxpart;
+
-- 
2.17.1

