Hi David,

>> I pushed the changes to WindowAgg so as not to call tuplestore_end()
>> on every partition.  Can you rebase this patch over that change?
>> 
>> It would be good to do this in a way that does not add any new state
>> to WindowAggState, you can see that I had to shuffle fields around in
>> that struct because the next_parition field would have caused the
>> struct to become larger. I've not looked closely, but I expect this
>> can be done by adding more code to tuplestore_updatemax() to also
>> track the disk space used if the current storage has gone to disk. I
>> expect the maxSpace field can be used for both, but we'd need another
>> bool field to track if the max used was by disk or memory.

I have created a patch in the direction you suggested.  See attached
patch (v1-0001-Enhance-tuplestore.txt). To not confuse CFbot, the
extension is "txt", not "patch".

>> I think the performance of this would also need to be tested as it
>> means doing an lseek() on every tuplestore_clear() when we've gone to
>> disk. Probably that will be dominated by all the other overheads of a
>> tuplestore going to disk (i.e. dumptuples() etc), but it would be good
>> to check this. I suggest setting work_mem = 64 and making a test case
>> that only just spills to disk. Maybe do a few thousand partitions
>> worth of that and see if you can measure any slowdown.

I copied your shell script and slightly modified it then ran pgbench
with (1 10 100 1000 5000 10000) window partitions (see attached shell
script). In the script I set work_mem to 64kB. It seems for 10000,
1000 and 100 partitions, the performance difference seems
noises. However, for 10, 2, 1 partitions. I see large performance
degradation with the patched version: patched is slower than stock
master in 1.5% (10 partitions), 41% (2 partitions) and 55.7% (1
partition). See the attached graph.
>From 4749d2018f33e883c292eb904f3253d393a47c99 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <is...@postgresql.org>
Date: Thu, 5 Sep 2024 20:59:01 +0900
Subject: [PATCH v1] Enhance tuplestore.

Let tuplestore_updatemax() handle both memory and disk case.
---
 src/backend/utils/sort/tuplestore.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/sort/tuplestore.c 
b/src/backend/utils/sort/tuplestore.c
index 444c8e25c2..854121fc11 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -107,9 +107,10 @@ struct Tuplestorestate
        bool            backward;               /* store extra length words in 
file? */
        bool            interXact;              /* keep open through 
transactions? */
        bool            truncated;              /* tuplestore_trim has removed 
tuples? */
+       bool            inMem;                  /* true if maxSpace is for 
memory */
        int64           availMem;               /* remaining memory available, 
in bytes */
        int64           allowedMem;             /* total memory allowed, in 
bytes */
-       int64           maxSpace;               /* maximum space used in memory 
*/
+       int64           maxSpace;               /* maximum space used in memory 
or disk */
        int64           tuples;                 /* number of tuples added */
        BufFile    *myfile;                     /* underlying file, or NULL if 
none */
        MemoryContext context;          /* memory context for holding tuples */
@@ -262,6 +263,7 @@ tuplestore_begin_common(int eflags, bool interXact, int 
maxKBytes)
        state->eflags = eflags;
        state->interXact = interXact;
        state->truncated = false;
+       state->inMem = true;
        state->allowedMem = maxKBytes * 1024L;
        state->availMem = state->allowedMem;
        state->maxSpace = 0;
@@ -1497,8 +1499,17 @@ static void
 tuplestore_updatemax(Tuplestorestate *state)
 {
        if (state->status == TSS_INMEM)
+       {
                state->maxSpace = Max(state->maxSpace,
                                                          state->allowedMem - 
state->availMem);
+               state->inMem = true;
+       }
+       else
+       {
+               state->maxSpace = Max(state->maxSpace,
+                                                         
BufFileSize(state->myfile));
+               state->inMem = false;
+       }
 }
 
 /*
@@ -1509,7 +1520,7 @@ tuplestore_updatemax(Tuplestorestate *state)
 const char *
 tuplestore_storage_type_name(Tuplestorestate *state)
 {
-       if (state->status == TSS_INMEM)
+       if (state->inMem)
                return "Memory";
        else
                return "Disk";
@@ -1517,8 +1528,7 @@ tuplestore_storage_type_name(Tuplestorestate *state)
 
 /*
  * tuplestore_space_used
- *             Return the maximum space used in memory unless the tuplestore 
has spilled
- *             to disk, in which case, return the disk space used.
+ *             Return the maximum space used in memory or disk.
  */
 int64
 tuplestore_space_used(Tuplestorestate *state)
@@ -1526,10 +1536,7 @@ tuplestore_space_used(Tuplestorestate *state)
        /* First, update the maxSpace field */
        tuplestore_updatemax(state);
 
-       if (state->status == TSS_INMEM)
-               return state->maxSpace;
-       else
-               return BufFileSize(state->myfile);
+       return state->maxSpace;
 }
 
 /*
@@ -1601,7 +1608,9 @@ writetup_heap(Tuplestorestate *state, void *tup)
        if (state->backward)            /* need trailing length word? */
                BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
 
-       /* no need to call tuplestore_updatemax() when not in TSS_INMEM */
+       /* update maxSpace */
+       tuplestore_updatemax(state);
+
        FREEMEM(state, GetMemoryChunkSpace(tuple));
        heap_free_minimal_tuple(tuple);
 }
-- 
2.25.1

dbname=test
rows=10000
secs=10
ntests=3

psql -c "drop table if exists part_test;" $dbname
psql -c "create table part_test (a int not null);" $dbname
psql -c "insert into part_test select g.s from generate_series(1,$rows) g(s);" 
$dbname
psql -c "vacuum freeze analyze part_test;" $dbname
psql -c "alter system set work_mem = '64kB';" $dbname
psql -c "alter system set jit = 0;" $dbname
psql -c "select pg_reload_conf();" $dbname

for c in 1 10 100 1000 5000 10000
do
        echo "SELECT a,count(*) OVER (PARTITION BY a / $c) FROM part_test 
OFFSET $rows" > bench.sql
        echo "Testing with $(($rows / $c)) partitions"
        for i in $(seq 1 $ntests)
        do
                pgbench -n -f bench.sql -M prepared -T $secs $dbname | grep 
latency
        done
done

Reply via email to