Hi Melih.

Now that the design#1 ERRORs have been fixed, we returned to doing
performance measuring of the design#1 patch versus HEAD.

Unfortunately, we observed that under some particular conditions
(large transactions of 1000 inserts/tx for a busy apply worker, 100
empty tables to be synced) the performance was worse with the design#1
patch applied.

~~

RESULTS

Below are some recent measurements (for 100 empty tables to be synced
when apply worker is already busy). We vary the size of the published
transaction for the "busy" table, and you can see that for certain
large transaction sizes (1000 and 2000 inserts/tx) the design#1
performance was worse than HEAD:

~

The publisher "busy" table does commit every 10 inserts:
2w 4w 8w 16w
HEAD 3945 1138 1166 1205
HEAD+v24-0002 3559 886 355 490
%improvement 10% 22% 70% 59%

~

The publisher "busy" table does commit every 100 inserts:
2w 4w 8w 16w
HEAD 2363 1357 1354 1355
HEAD+v24-0002 2077 1358 762 756
%improvement 12% 0% 44% 44%

~

Publisher "busy" table does commit every 1000 inserts:
2w 4w 8w 16w
HEAD 11898 5855 1868 1631
HEAD+v24-0002 21905 8254 3531 1626
%improvement -84% -41% -89% 0%

^ Note - design#1 was slower than HEAD here

~

Publisher "busy" table does commit every 2000 inserts:
2w 4w 8w 16w
HEAD 21740 7109 3454 1703
HEAD+v24-0002 21585 10877 4779 2293
%improvement 1% -53% -38% -35%

^ Note - design#1 was slower than HEAD here

~

The publisher "busy" table does commit every 5000 inserts:
2w 4w 8w 16w
HEAD 36094 18105 8595 3567
HEAD+v24-0002 36305 18199 8151 3710
%improvement -1% -1% 5% -4%

~

The publisher "busy" table does commit every 10000 inserts:
2w 4w 8w 16w
HEAD 38077 18406 9426 5559
HEAD+v24-0002 36763 18027 8896 4166
%improvement 3% 2% 6% 25%

------

TEST SCRIPTS

The "busy apply" test scripts are basically the same as already posted
[1], but I have reattached the latest ones again anyway.

------
[1] 
https://www.postgresql.org/message-id/CAHut%2BPuNVNK2%2BA%2BR6eV8rKPNBHemCFE4NDtEYfpXbYr6SsvvBg%40mail.gmail.com

Kind Regards,
Peter Smith.
Fujitsu Australia
#!/bin/bash
#
# SUB
#
# First argument : number of tables
# Second argument : size[Byte] of each tables
# Third argument : max_sync_workers
# Fourth argument : execution numbers
#

port_pub=5431
data_pub=datapub

port_sub=5432
data_sub=datasub

echo '########################'
echo '# Check configurations #'
echo '########################'

declare num_tables
if [ -n "$1" ]; then
    num_tables=$1
else
    num_tables=10
fi

echo "$num_tables tables will be used while testing"

declare table_size
if [ -n "$2" ]; then
    table_size=$2
else
    table_size=0
fi

num_sync_workers=$3
run_no=$4

#
# Convert from table_size to number of tuples. The equation was
# found by my tests...
#

declare num_tuples
if [ $table_size == "10kB" ]
then
    num_tuples=3250
else
    num_tuples=0
fi

echo "$num_tuples tuples will be inserted to each tables"


echo '##########################'
echo '# IPC at subscriber-side #'
echo '##########################'

psql -U postgres -p $port_sub -a -c "CREATE SUBSCRIPTION ipc_from_publisher CONNECTION 'host=localhost user=postgres port=$port_pub' PUBLICATION ipc_at_publisher WITH(origin=NONE);"
psql -U postgres -p $port_sub -a -c "CREATE PUBLICATION ipc_at_subscriber FOR TABLE ipc;"

# wait a bit for the publisher-side to connect to this publication
sleep 5s

psql -U postgres -p $port_sub -a -c "INSERT INTO ipc VALUES('sub ipc ready');"
psql -U postgres -p $port_sub -a -c "CALL ipc_wait_for('pub ipc ready');"

echo '#################'
echo '# Create tables #'
echo '#################'
(
    echo "CREATE TABLE busy_tbl(a text);"

    echo "CREATE SCHEMA test_tables;"
    echo -e "SELECT 'CREATE TABLE test_tables.manytables_'||i||'(i int);' FROM generate_series(1, $num_tables) g(i) \gexec"

) | psql -U postgres -p $port_sub -a

echo '######################################'
echo '# Create subscription for busy table #'
echo '######################################'
(
    echo "CREATE SUBSCRIPTION mysub CONNECTION 'host=localhost user=postgres port=$port_pub' PUBLICATION mypub;"
    echo "INSERT INTO ipc VALUES ('mysub is created');"

) | psql -U postgres -p $port_sub -a


echo '########'
echo '# Test #'
echo '########'
(
    echo -e "CREATE OR REPLACE PROCEDURE log_rep_test(max INTEGER) AS \$\$
    DECLARE
        total_duration INTERVAL := '0';
        avg_duration FLOAT := 0.0;
        start_time TIMESTAMP;
        end_time TIMESTAMP;
    BEGIN
        start_time := clock_timestamp();

        -- time how long it takes for all the tablesyncs to become "ready"
        WHILE EXISTS (SELECT 1 FROM pg_subscription_rel WHERE srsubstate != 'r') LOOP
            COMMIT;
        END LOOP;

        end_time := clock_timestamp();

        total_duration := total_duration + (end_time - start_time);
    
        IF max > 0 THEN
            avg_duration := EXTRACT(EPOCH FROM total_duration) / max * 1000;
        END IF;
    
        RAISE NOTICE 'RESULT: %', avg_duration;
    END;
    \$\$ LANGUAGE plpgsql;
"
) | psql -U postgres -p $port_sub -a


psql -U postgres -p $port_sub -a -c "CALL ipc_wait_for('pub busy started');"

# This alter/refresh will cause all the test tables to participate in the subscription
psql -U postgres -p $port_sub -a -c "INSERT INTO ipc VALUES ('test started');"
psql -U postgres -p $port_sub -a -c "ALTER SUBSCRIPTION mysub REFRESH PUBLICATION;"
psql -U postgres -p $port_sub -a -c "CALL log_rep_test(1);"
psql -U postgres -p $port_sub -a -c "INSERT INTO ipc VALUES ('test finished');"

psql -U postgres -p $port_sub -a -c "CALL ipc_wait_for('pub busy finished');"
psql -U postgres -p $port_sub -a -c "SELECT count(*) FROM busy_tbl;"

psql -U postgres -p $port_sub -a -c "DROP SUBSCRIPTION mysub;"
psql -U postgres -p $port_sub -a -c "DROP SUBSCRIPTION ipc_from_publisher;"
psql -U postgres -p $port_sub -a -c "DROP PUBLICATION ipc_at_subscriber;"
#!/bin/bash
#
# PUB
#
# First argument : number of tables
# Second argument : size[Byte] of each tables
# Third argument : max_sync_workers
# Fourth argument : execution numbers
#

port_pub=5431
data_pub=datapub

port_sub=5432
data_sub=datasub

echo '########################'
echo '# Check configurations #'
echo '########################'

declare num_tables
if [ -n "$1" ]; then
    num_tables=$1
else
    num_tables=10
fi

echo "$num_tables tables will be used while testing"

declare table_size
if [ -n "$2" ]; then
    table_size=$2
else
    table_size=0
fi

num_sync_workers=$3
run_no=$4

#
# Convert from table_size to number of tuples. The equation was
# found by my tests...
#

declare num_tuples
if [ $table_size == "10kB" ]
then
    num_tuples=3250
else
    num_tuples=0
fi

echo "$num_tuples tuples will be inserted to each tables"


echo '#########################'
echo '# IPC at publisher-side #'
echo '#########################'

psql -U postgres -p $port_pub -a -c "CREATE PUBLICATION ipc_at_publisher FOR TABLE ipc;"

# wait a bit for the subscriber-side to connect to this publication
sleep 5s

psql -U postgres -p $port_pub -a -c "CREATE SUBSCRIPTION ipc_from_subscriber CONNECTION 'host=localhost user=postgres port=$port_sub' PUBLICATION ipc_at_subscriber WITH(origin=NONE);"

psql -U postgres -p $port_pub -a -c "INSERT INTO ipc VALUES('pub ipc ready');"
psql -U postgres -p $port_pub -a -c "CALL ipc_wait_for('sub ipc ready');"

psql -U postgres -p $port_pub -a -c "SELECT * FROM ipc ORDER BY ts;"


echo '#############################################'
echo '# Create tables, and populate the test data #'
echo '#############################################'
(
    echo "CREATE TABLE busy_tbl(a text);"

    echo "CREATE SCHEMA test_tables;"	
    echo -e "SELECT 'CREATE TABLE test_tables.manytables_'||i||'(i int);' FROM generate_series(1, $num_tables) g(i) \gexec"
    echo -e "SELECT 'INSERT INTO test_tables.manytables_'||i||' VALUES (generate_series(1, $num_tuples))' FROM generate_series(1, $num_tables) g(i) \gexec"

) | psql -U postgres -p $port_pub -a


echo '##############'
echo '# Busy table #'
echo '##############'

psql -U postgres -p $port_pub -a -c "CREATE PUBLICATION mypub FOR TABLE busy_tbl;"

# wait a bit for the subscriber-side to connect to this publication
sleep 5s

psql -U postgres -p $port_pub -a -c "CALL ipc_wait_for('mysub is created');"
psql -U postgres -p $port_pub -a -c "SELECT * FROM ipc ORDER BY ts;"

echo '#####################'
echo '# Alter publication #'
echo '#####################'

psql -U postgres -p $port_pub -a -c "ALTER PUBLICATION mypub ADD TABLES IN SCHEMA test_tables;"

echo '################'
echo '# Stay busy... #'
echo '################'
(
    echo -e "CREATE OR REPLACE PROCEDURE stay_busy() AS \$\$
    DECLARE
      counter INTEGER := 0;
      commit_counter INTEGER := 0;
    BEGIN
      EXECUTE 'INSERT INTO ipc VALUES(''pub busy started'');';
      RAISE NOTICE 'START: stay_busy';
      WHILE NOT EXISTS (SELECT 1 FROM ipc WHERE msg = 'test finished') LOOP
        EXECUTE 'INSERT INTO busy_tbl VALUES(''some data'');';
        counter := counter + 1;
        commit_counter := commit_counter + 1;
        IF commit_counter = 1000 THEN
          COMMIT;
          commit_counter := 0;
        END IF;
      END LOOP;
      COMMIT;
      EXECUTE 'INSERT INTO ipc VALUES(''pub busy finished'');';
      RAISE NOTICE 'END: stay_busy inserted % records', counter;
    END;
    \$\$ LANGUAGE plpgsql;
"
) | psql -U postgres -p $port_pub -a

psql -U postgres -p $port_pub -a -c "CALL stay_busy();"
psql -U postgres -p $port_pub -a -c "SELECT * FROM ipc ORDER BY ts;"

# wait a bit for the subscriber-side to drop the subscription
sleep 5s

psql -U postgres -p $port_pub -a -c "DROP PUBLICATION mypub;"
psql -U postgres -p $port_pub -a -c "DROP SUBSCRIPTION ipc_from_subscriber;"
psql -U postgres -p $port_pub -a -c "DROP PUBLICATION ipc_at_publisher;"

#!/bin/bash
#
# First argument : number of tables
# Second argument : size[Byte] of each tables
# Third argument : max_sync_workers
# Fourth argument : execution numbers
# Fifth argument: test prefix
#

port_pub=5431
data_pub=datapub

port_sub=5432
data_sub=datasub

echo '########################'
echo '# Check configurations #'
echo '########################'

declare num_tables
if [ -n "$1" ]; then
    num_tables=$1
else
    num_tables=10
fi

echo "$num_tables tables will be used while testing"

declare table_size
if [ -n "$2" ]; then
    table_size=$2
else
    table_size=0
fi

num_sync_workers=$3
run_no=$4
prefix=$5

# logname must be defined here

log_pub=pub_${prefix}_${num_tables}_${table_size}_${num_sync_workers}_${run_no}.log
log_sub=sub_${prefix}_${num_tables}_${table_size}_${num_sync_workers}_${run_no}.log

#
# Convert from table_size to number of tuples. The equation was
# found by my tests...
#

declare num_tuples
if [ $table_size == "10kB" ]
then
    num_tuples=3250
else
    num_tuples=0
fi

echo "$num_tuples tuples will be inserted to each tables"


echo '############'
echo '# Clean up #'
echo '############'

pg_ctl stop -D $data_pub -w
pg_ctl stop -D $data_sub -w

rm -rf $data_pub $data_sub

echo '##########'
echo '# Set up #'
echo '##########'

initdb -D $data_pub -U postgres
initdb -D $data_sub -U postgres

cat << EOF >> $data_pub/postgresql.conf
wal_level = logical
wal_sender_timeout = 0
wal_receiver_timeout = 0
port = $port_pub
shared_buffers = 40GB
max_worker_processes = 32
max_parallel_maintenance_workers = 24
max_parallel_workers = 32
synchronous_commit = off
checkpoint_timeout = 1d
max_wal_size = 24GB
min_wal_size = 15GB
autovacuum = off
max_sync_workers_per_subscription = 8
log_line_prefix = '%n [%p] '
max_wal_senders = 200
max_replication_slots = 200
EOF

cat << EOF >> $data_sub/postgresql.conf
wal_level = logical
wal_sender_timeout = 0
wal_receiver_timeout = 0
port = $port_sub
shared_buffers = 40GB
max_worker_processes = 32
max_parallel_maintenance_workers = 24
max_parallel_workers = 32
synchronous_commit = off
checkpoint_timeout = 1d
max_wal_size = 24GB
min_wal_size = 15GB
autovacuum = off
max_sync_workers_per_subscription = $num_sync_workers
max_logical_replication_workers = 50
log_line_prefix = '%n [%p] '
max_replication_slots = 200
EOF

pg_ctl -D $data_pub start -w -l $log_pub
pg_ctl -D $data_sub start -w -l $log_sub

#
# Setup the IPC tables and Procedures
#
(
    echo "CREATE TABLE ipc(msg text);"
    echo "ALTER TABLE ipc ADD COLUMN ts TIMESTAMP;"
    echo "ALTER TABLE ipc ALTER COLUMN ts SET DEFAULT now();"
    echo "CREATE OR REPLACE PROCEDURE ipc_wait_for(state TEXT) AS \$\$
    DECLARE
    BEGIN
        RAISE NOTICE 'wait for state %', state;
        WHILE NOT EXISTS (SELECT 1 FROM ipc WHERE msg = state) LOOP
            RAISE NOTICE 'waiting until state %', state;
            EXECUTE 'SELECT pg_sleep(0.1);';
         END LOOP;
         RAISE NOTICE 'ok, reached state %', state;
    END;
    \$\$ LANGUAGE plpgsql;
"
) | psql -U postgres -p $port_pub -a

(
    echo "CREATE TABLE ipc(msg text);"
    echo "ALTER TABLE ipc ADD COLUMN ts TIMESTAMP;"
    echo "ALTER TABLE ipc ALTER COLUMN ts SET DEFAULT now();"
    echo "CREATE OR REPLACE PROCEDURE ipc_wait_for(state TEXT) AS \$\$
    DECLARE
    BEGIN
        RAISE NOTICE 'wait for state %', state;
        WHILE NOT EXISTS (SELECT 1 FROM ipc WHERE msg = state) LOOP
            RAISE NOTICE 'waiting until state %', state;
             EXECUTE 'SELECT pg_sleep(0.1);';
        END LOOP;
        RAISE NOTICE 'ok, reached state %', state;
    END;
    \$\$ LANGUAGE plpgsql;
"
) | psql -U postgres -p $port_sub -a

#!/bin/bash

tables=( 100 )

#tables=( 10 \
#100 \
#1000 \
#2000
#)

workers=( 4 )

#workers=( 2 \
#4 \
#8 \
#16
#)

prefix="PREFIX"

# For now, either of "0" or "10kB" could be accepted
size="0"
#size="10kB"

for j in "${workers[@]}"
do
    for i in "${tables[@]}"
    do
	for k in {1..5}
	do
	    echo -e "num_tables=${i}, size=${size}, num_workers=${j}, run #${k}"
	
	    # Run the pub and sub scripts in parallel, after doing the common setup
	    ./do_one_test_setup.sh $i ${size} $j $k $prefix &> ${prefix}_${i}_${size}_${j}_${k}.dat
	    #
	    ./do_one_test_PUB.sh $i ${size} $j $k &> ${prefix}_${i}_${size}_${j}_${k}.dat_PUB & 
#	    ./do_one_test_SUB_SYNCDONE.sh $i ${size} $j $k &> ${prefix}_${i}_${size}_${j}_${k}.dat_SUB_SYNCDONE &
	    ./do_one_test_SUB.sh $i ${size} $j $k &> ${prefix}_${i}_${size}_${j}_${k}.dat_SUB &
            wait
	done
    done
done

Reply via email to