Attached is a patch that exports a new function from logtape.c:

   extern LogicalTapeSet *LogicalTapeSetExtend(
            LogicalTapeSet *lts, int nAdditional);

The purpose is to allow adding new tapes dynamically for the upcoming
Hash Aggregation work[0]. HashAgg doesn't know in advance how many
tapes it will need, though only a limited number are actually active at
a time.

This was proposed and originally written by Adam Lee[1] (extract only
the changes to logtape.c/h from his posted patch). Strangely, I'm
seeing ~5% regression with his version when doing:

    -- t20m_1_int4 has 20 million random integers
    select * from t20m_1_int4 order by i offset 100000000;

Which seems to be due to using a pointer rather than a flexible array
member (I'm using gcc[2]). My version (attached) still uses a flexible
array member, which doesn't see the regression; but I repalloc the
whole struct so the caller needs to save the new pointer to the tape
set.

That doesn't entirely make sense to me, and I'm wondering if someone
else can repro that result and/or make a suggestion, because I don't
have a good explanation. I'm fine with my version of the patch, but it
would be nice to know why there's such a big difference using a pointer
versus a flexible array member.

Regards,
        Jeff Davis

[0] 
https://postgr.es/m/6e7c269b9a84ff75fefcad8ab2d4758f03581e98.camel%40j-davis.com
[1] https://postgr.es/m/20200108071202.GA1511@mars.local
[2] gcc (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0

diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c
index 4f78b55fbaf..36104a73a75 100644
--- a/src/backend/utils/sort/logtape.c
+++ b/src/backend/utils/sort/logtape.c
@@ -201,6 +201,7 @@ static long ltsGetFreeBlock(LogicalTapeSet *lts);
 static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum);
 static void ltsConcatWorkerTapes(LogicalTapeSet *lts, TapeShare *shared,
 								 SharedFileSet *fileset);
+static void ltsInitTape(LogicalTape *lt);
 static void ltsInitReadBuffer(LogicalTapeSet *lts, LogicalTape *lt);
 
 
@@ -536,6 +537,30 @@ ltsConcatWorkerTapes(LogicalTapeSet *lts, TapeShare *shared,
 	lts->nHoleBlocks = lts->nBlocksAllocated - nphysicalblocks;
 }
 
+/*
+ * Initialize per-tape struct.  Note we allocate the I/O buffer and the first
+ * block for a tape only when it is first actually written to.  This avoids
+ * wasting memory space when tuplesort.c overestimates the number of tapes
+ * needed.
+ */
+static void
+ltsInitTape(LogicalTape *lt)
+{
+	lt->writing           = true;
+	lt->frozen            = false;
+	lt->dirty             = false;
+	lt->firstBlockNumber  = -1L;
+	lt->curBlockNumber    = -1L;
+	lt->nextBlockNumber   = -1L;
+	lt->offsetBlockNumber = 0L;
+	lt->buffer            = NULL;
+	lt->buffer_size       = 0;
+	/* palloc() larger than MaxAllocSize would fail */
+	lt->max_size          = MaxAllocSize;
+	lt->pos               = 0;
+	lt->nbytes            = 0;
+}
+
 /*
  * Lazily allocate and initialize the read buffer. This avoids waste when many
  * tapes are open at once, but not all are active between rewinding and
@@ -579,7 +604,6 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
 					 int worker)
 {
 	LogicalTapeSet *lts;
-	LogicalTape *lt;
 	int			i;
 
 	/*
@@ -597,29 +621,8 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
 	lts->nFreeBlocks = 0;
 	lts->nTapes = ntapes;
 
-	/*
-	 * Initialize per-tape structs.  Note we allocate the I/O buffer and the
-	 * first block for a tape only when it is first actually written to.  This
-	 * avoids wasting memory space when tuplesort.c overestimates the number
-	 * of tapes needed.
-	 */
 	for (i = 0; i < ntapes; i++)
-	{
-		lt = &lts->tapes[i];
-		lt->writing = true;
-		lt->frozen = false;
-		lt->dirty = false;
-		lt->firstBlockNumber = -1L;
-		lt->curBlockNumber = -1L;
-		lt->nextBlockNumber = -1L;
-		lt->offsetBlockNumber = 0L;
-		lt->buffer = NULL;
-		lt->buffer_size = 0;
-		/* palloc() larger than MaxAllocSize would fail */
-		lt->max_size = MaxAllocSize;
-		lt->pos = 0;
-		lt->nbytes = 0;
-	}
+		ltsInitTape(&lts->tapes[i]);
 
 	/*
 	 * Create temp BufFile storage as required.
@@ -1004,6 +1007,29 @@ LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum, TapeShare *share)
 	}
 }
 
+/*
+ * Add additional tapes to this tape set. Not intended to be used when any
+ * tapes are frozen.
+ */
+LogicalTapeSet *
+LogicalTapeSetExtend(LogicalTapeSet *lts, int nAdditional)
+{
+	int     i;
+	int		nTapesOrig = lts->nTapes;
+	Size	newSize;
+
+	lts->nTapes += nAdditional;
+	newSize = offsetof(LogicalTapeSet, tapes) +
+		lts->nTapes * sizeof(LogicalTape);
+
+	lts = (LogicalTapeSet *) repalloc(lts, newSize);
+
+	for (i = nTapesOrig; i < lts->nTapes; i++)
+		ltsInitTape(&lts->tapes[i]);
+
+	return lts;
+}
+
 /*
  * Backspace the tape a given number of bytes.  (We also support a more
  * general seek interface, see below.)
diff --git a/src/include/utils/logtape.h b/src/include/utils/logtape.h
index 695d2c00ee4..3ebe52239f8 100644
--- a/src/include/utils/logtape.h
+++ b/src/include/utils/logtape.h
@@ -67,6 +67,8 @@ extern void LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum,
 extern void LogicalTapeRewindForWrite(LogicalTapeSet *lts, int tapenum);
 extern void LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum,
 							  TapeShare *share);
+extern LogicalTapeSet *LogicalTapeSetExtend(LogicalTapeSet *lts,
+											int nAdditional);
 extern size_t LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum,
 								   size_t size);
 extern void LogicalTapeSeek(LogicalTapeSet *lts, int tapenum,

Reply via email to