commit ee29bccc4540f61dca283e4404ff140705628552
Author: Chao Li (Evan) <lic@highgo.com>
Date:   Sat Dec 6 07:03:14 2025 +0800

    the other implmentation

diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 028c5b71c27..5e3e49ba22e 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -632,6 +632,7 @@ typedef struct RadixPartitionInfo
 		size_t		count;
 		size_t		offset;
 	};
+	size_t		begin_offset;
 	size_t		next_offset;
 }			RadixPartitionInfo;
 
@@ -2763,13 +2764,14 @@ static void
 radix_sort_tuple(SortTuple *begin, size_t n_elems, int level, Tuplesortstate *state)
 {
 	RadixPartitionInfo partitions[256] = {0};
-	uint8_t		remaining_partitions[256] = {0};
+	uint8_t		remaining_partitions[256];
 	size_t		total = 0;
 	int			num_partitions = 0;
-	int			num_remaining;
+	//int			num_remaining;
 	SortSupport ssup = &state->base.sortKeys[0];
-	size_t		start_offset = 0;
-	SortTuple  *partition_begin = begin;
+	SortTuple pending, tmp;
+	//size_t		start_offset = 0;
+	//SortTuple  *partition_begin = begin;
 
 	/* count number of occurrences of each byte */
 	for (SortTuple *tup = begin; tup < begin + n_elems; tup++)
@@ -2796,6 +2798,7 @@ radix_sort_tuple(SortTuple *begin, size_t n_elems, int level, Tuplesortstate *st
 		if (count != 0)
 		{
 			partitions[i].offset = total;
+			partitions[i].begin_offset = total;
 			total += count;
 			remaining_partitions[num_partitions] = i;
 			num_partitions++;
@@ -2803,7 +2806,7 @@ radix_sort_tuple(SortTuple *begin, size_t n_elems, int level, Tuplesortstate *st
 		partitions[i].next_offset = total;
 	}
 
-	num_remaining = num_partitions;
+	//num_remaining = num_partitions;
 
 	/*
 	 * Swap tuples to correct partition.
@@ -2825,45 +2828,75 @@ radix_sort_tuple(SortTuple *begin, size_t n_elems, int level, Tuplesortstate *st
 	 * previous loop iteration results in only one partition that hasn't been
 	 * counted as sorted, we know it's actually sorted and can exit the loop.
 	 */
-	while (num_remaining > 1)
+	// while (num_remaining > 1)
+	// {
+	// 	/* start the count over */
+	// 	num_remaining = num_partitions;
+
+	// 	for (int i = 0; i < num_partitions; i++)
+	// 	{
+	// 		uint8		idx = remaining_partitions[i];
+
+	// 		for (SortTuple *st = begin + partitions[idx].offset;
+	// 			 st < begin + partitions[idx].next_offset;
+	// 			 st++)
+	// 		{
+	// 			size_t		offset = partitions[st->current_byte].offset++;
+	// 			SortTuple	tmp;
+
+	// 			/* swap current tuple with destination position */
+	// 			Assert(offset < n_elems);
+	// 			tmp = *st;
+	// 			*st = begin[offset];
+	// 			begin[offset] = tmp;
+
+	// 			CHECK_FOR_INTERRUPTS();
+	// 		};
+
+	// 		/* count sorted partitions */
+	// 		if (partitions[idx].offset == partitions[idx].next_offset)
+	// 			num_remaining--;
+	// 	}
+	// }
+	if (num_partitions > 1)
 	{
-		/* start the count over */
-		num_remaining = num_partitions;
-
-		for (int i = 0; i < num_partitions; i++)
+		for (int i = 0; i < n_elems; i++)
 		{
-			uint8		idx = remaining_partitions[i];
+			SortTuple *st = begin + i;
+
+			if (i >= partitions[st->current_byte].begin_offset &&
+				i < partitions[st->current_byte].next_offset)
+				continue;
 
-			for (SortTuple *st = begin + partitions[idx].offset;
-				 st < begin + partitions[idx].next_offset;
-				 st++)
+			while (true)
 			{
-				size_t		offset = partitions[st->current_byte].offset++;
-				SortTuple	tmp;
+				size_t		offset = partitions[st->current_byte].offset;
+
+				CHECK_FOR_INTERRUPTS();
+
+				/* target is the empty position, we are done */
+				if (offset == i)
+				{
+					begin[i] = pending;
+					break;
+				}
 
-				/* swap current tuple with destination position */
 				Assert(offset < n_elems);
 				tmp = *st;
-				*st = begin[offset];
+				pending = begin[offset];
 				begin[offset] = tmp;
-
-				CHECK_FOR_INTERRUPTS();
+				st = &pending;
+				partitions[begin[offset].current_byte].offset ++;
 			};
-
-			/* count sorted partitions */
-			if (partitions[idx].offset == partitions[idx].next_offset)
-				num_remaining--;
 		}
 	}
 
 	/* recurse */
-	for (uint8_t *rp = remaining_partitions;
-		 rp < remaining_partitions + num_partitions;
-		 rp++)
+	for (int i = 0; i < num_partitions; i++)
 	{
-		size_t		end_offset = partitions[*rp].next_offset;
-		SortTuple  *partition_end = begin + end_offset;
-		ptrdiff_t	num_elements = end_offset - start_offset;
+		uint8_t		rp = remaining_partitions[i];
+		SortTuple *partition_begin = begin + partitions[rp].begin_offset;
+		size_t	num_elements = partitions[rp].next_offset - partitions[rp].begin_offset;
 
 		if (num_elements > 1)
 		{
@@ -2897,9 +2930,6 @@ radix_sort_tuple(SortTuple *begin, size_t n_elems, int level, Tuplesortstate *st
 							state);
 			}
 		}
-
-		start_offset = end_offset;
-		partition_begin = partition_end;
 	}
 }
 
