diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 1452e8c..8ab2e9d 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -267,12 +267,14 @@ struct Tuplesortstate
 	 * and FINALMERGE, the tuples are organized in "heap" order per Algorithm
 	 * H.  (Note that memtupcount only counts the tuples that are part of the
 	 * heap --- during merge passes, memtuples[] entries beyond tapeRange are
-	 * never in the heap and are used to hold pre-read tuples.)  In state
-	 * SORTEDONTAPE, the array is not used.
+	 * never in the heap and are used to hold pre-read tuples; and while
+	 * building runs, we temporarily stash tuples destined for the next run
+	 * at the end of the array.)  In state SORTEDONTAPE, the array is not used.
 	 */
 	SortTuple  *memtuples;		/* array of SortTuple structs */
 	int			memtupcount;	/* number of tuples currently present */
 	int			memtupsize;		/* allocated length of memtuples array */
+	int			memtupnextrun;	/* tuples saved at end for next run */
 
 	/*
 	 * While building initial runs, this is the current output run number
@@ -1169,13 +1171,12 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
 		case TSS_BUILDRUNS:
 
 			/*
-			 * Insert the tuple into the heap, with run number currentRun if
-			 * it can go into the current run, else run number currentRun+1.
-			 * The tuple can go into the current run if it is >= the first
-			 * not-yet-output tuple.  (Actually, it could go into the current
-			 * run if it is >= the most recently output tuple ... but that
-			 * would require keeping around the tuple we last output, and it's
-			 * simplest to let writetup free each tuple as soon as it's
+			 * Insert the tuple into the heap.  The tuple can go into the
+			 * current run if it is >= the first not-yet-output tuple; if not,
+			 * we save it for the next run.  (Actually, it could go into the
+			 * current run if it is >= the most recently output tuple ... but
+			 * that would require keeping around the tuple we last output, and
+			 * it's simplest to let writetup free each tuple as soon as it's
 			 * written.)
 			 *
 			 * Note there will always be at least one tuple in the heap at
@@ -1183,9 +1184,12 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
 			 */
 			Assert(state->memtupcount > 0);
 			if (COMPARETUP(state, tuple, &state->memtuples[0]) >= 0)
-				tuplesort_heap_insert(state, tuple, state->currentRun, true);
+				tuplesort_heap_insert(state, tuple, state->currentRun, false);
 			else
-				tuplesort_heap_insert(state, tuple, state->currentRun + 1, true);
+			{
+				state->memtuples[state->memtupsize - ++state->memtupnextrun] =
+					*tuple;
+			}
 
 			/*
 			 * If we are over the memory limit, dump tuples till we're under.
@@ -2173,7 +2177,7 @@ dumptuples(Tuplesortstate *state, bool alltuples)
 {
 	while (alltuples ||
 		   (LACKMEM(state) && state->memtupcount > 1) ||
-		   state->memtupcount >= state->memtupsize)
+		   state->memtupcount + state->memtupnextrun >= state->memtupsize)
 	{
 		/*
 		 * Dump the heap's frontmost entry, and sift up to remove it from the
@@ -2185,12 +2189,13 @@ dumptuples(Tuplesortstate *state, bool alltuples)
 		tuplesort_heap_siftup(state, true);
 
 		/*
-		 * If the heap is empty *or* top run number has changed, we've
-		 * finished the current run.
+		 * If the heap is empty, we might be completely done reading the input,
+		 * or we might just have finished the current run.
 		 */
-		if (state->memtupcount == 0 ||
-			state->currentRun != state->memtuples[0].tupindex)
+		if (state->memtupcount == 0)
 		{
+			int		index;
+
 			markrunend(state, state->tp_tapenum[state->destTape]);
 			state->currentRun++;
 			state->tp_runs[state->destTape]++;
@@ -2207,10 +2212,26 @@ dumptuples(Tuplesortstate *state, bool alltuples)
 			/*
 			 * Done if heap is empty, else prepare for new run.
 			 */
-			if (state->memtupcount == 0)
+			if (state->memtupnextrun == 0)
 				break;
 			Assert(state->currentRun == state->memtuples[0].tupindex);
 			selectnewtape(state);
+
+			/*
+			 * Build a heap out of the tuples saved for the next run.  We
+			 * must insert these in ascending index order; otherwise, the
+			 * growing heap might clobber an entry that hasn't been added
+			 * yet.
+			 *
+			 * XXX. We pass false to tuplesort_heap_insert() here since the
+			 * entire heap has the same run number at this point, but it
+			 * would be better to get rid of the tupIndex stuff altogether.
+			 */
+			index = state->memtupsize - state->memtupnextrun;
+			while (index < state->memtupsize)
+				tuplesort_heap_insert(state, &state->memtuples[index++],
+									  state->currentRun, false);
+			state->memtupnextrun = 0;
 		}
 	}
 }
