On Thu, Jul 20, 2023 at 02:22:51PM -0500, Tristan Partin wrote: > Thanks for your testing Michael. I went ahead and added a test to make sure > that this behavior doesn't regress accidentally, but I am struggling to get > the test to fail using the previous version of this patch. Do you have any > advice? This is my first time writing a test for Postgres. I can recreate > the issue outside of the test script, but not within it for whatever reason.
We're all here to learn, and writing TAP tests is important these days for a lot of patches. +# Check that the pgbench_branches and pgbench_tellers filler columns are filled +# with NULLs +foreach my $table ('pgbench_branches', 'pgbench_tellers') { + my ($ret, $out, $err) = $node->psql( + 'postgres', + "SELECT COUNT(1) FROM $table; + SELECT COUNT(1) FROM $table WHERE filler is NULL;", + extra_params => ['--no-align', '--tuples-only']); + + is($ret, 0, "psql $table counts status is 0"); + is($err, '', "psql $table counts stderr is empty"); + if ($out =~ m/^(\d+)\n(\d+)$/g) { + is($1, $2, "psql $table filler column filled with NULLs"); + } else { + fail("psql $table stdout m/^(\\d+)\n(\\d+)$/g"); + } +} This is IMO hard to parse, and I'd rather add some checks for the accounts and history tables as well. Let's use four simple SQL queries like what I posted upthread (no data for history inserted after initialization), as of the attached. I'd be tempted to apply that first as a separate patch, because we've never add coverage for that and we have specific expectations in the code from this filler column for tpc-b. And this needs to cover both client-side and server-side data generation. Note that the indentation was a bit incorrect, so fixed while on it. Attached is a v7, with these tests (should be a patch on its own but I'm lazy to split this morning) and some more adjustments that I have done while going through the patch. What do you think? -- Michael
From 211d66fc39338d522a72722fc3674e306826fd37 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Fri, 21 Jul 2023 11:07:31 +0900 Subject: [PATCH v7] Use COPY instead of INSERT for populating all tables COPY is a better interface for bulk loading and/or high latency connections. Previously COPY was only used for the pgbench_accounts table because the amount of data was so much larger than the other tables. However, as already said there are also gains to be had in the high latency case, such as loading data across continents. --- src/bin/pgbench/pgbench.c | 155 +++++++++++-------- src/bin/pgbench/t/001_pgbench_with_server.pl | 35 +++++ doc/src/sgml/ref/pgbench.sgml | 9 +- 3 files changed, 133 insertions(+), 66 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 087baa7d57..539c2795e2 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -835,6 +835,8 @@ static void add_socket_to_set(socket_set *sa, int fd, int idx); static int wait_on_socket_set(socket_set *sa, int64 usecs); static bool socket_has_input(socket_set *sa, int fd, int idx); +/* callback used to build rows for COPY during data loading */ +typedef void (*initRowMethod) (PQExpBufferData *sql, int64 curr); /* callback functions for our flex lexer */ static const PsqlScanCallbacks pgbench_callbacks = { @@ -4859,17 +4861,45 @@ initTruncateTables(PGconn *con) "pgbench_tellers"); } -/* - * Fill the standard tables with some data generated and sent from the client - */ static void -initGenerateDataClientSide(PGconn *con) +initBranch(PQExpBufferData *sql, int64 curr) { - PQExpBufferData sql; + /* "filler" column uses NULL */ + printfPQExpBuffer(sql, + INT64_FORMAT "\t0\t\\N\n", + curr + 1); +} + +static void +initTeller(PQExpBufferData *sql, int64 curr) +{ + /* "filler" column uses NULL */ + printfPQExpBuffer(sql, + INT64_FORMAT "\t" INT64_FORMAT "\t0\t\\N\n", + curr + 1, curr / ntellers + 1); +} + +static void +initAccount(PQExpBufferData *sql, int64 curr) +{ + /* "filler" column defaults to blank padded empty string */ + printfPQExpBuffer(sql, + INT64_FORMAT "\t" INT64_FORMAT "\t0\t\n", + curr + 1, curr / naccounts + 1); +} + +static void +initPopulateTable(PGconn *con, const char *table, int64 base, + initRowMethod init_row) +{ + int n; + int k; + int chars = 0; PGresult *res; - int i; - int64 k; - char *copy_statement; + PQExpBufferData sql; + char copy_statement[256]; + const char *copy_statement_fmt = "copy %s from stdin"; + int64 total = base * scale; /* used to track elapsed time and estimate of the remaining time */ pg_time_usec_t start; @@ -4878,50 +4908,24 @@ initGenerateDataClientSide(PGconn *con) /* Stay on the same line if reporting to a terminal */ char eol = isatty(fileno(stderr)) ? '\r' : '\n'; - fprintf(stderr, "generating data (client-side)...\n"); - - /* - * we do all of this in one transaction to enable the backend's - * data-loading optimizations - */ - executeStatement(con, "begin"); - - /* truncate away any old data */ - initTruncateTables(con); - initPQExpBuffer(&sql); /* - * fill branches, tellers, accounts in that order in case foreign keys - * already exist + * Use COPY with FREEZE on v14 and later for all the tables except + * pgbench_accounts when it is partitioned. */ - for (i = 0; i < nbranches * scale; i++) + if (PQserverVersion(con) >= 140000) { - /* "filler" column defaults to NULL */ - printfPQExpBuffer(&sql, - "insert into pgbench_branches(bid,bbalance) values(%d,0)", - i + 1); - executeStatement(con, sql.data); + if (strcmp(table, "pgbench_accounts") != 0 || + partitions == 0) + copy_statement_fmt = "copy %s from stdin with (freeze on)"; } - for (i = 0; i < ntellers * scale; i++) - { - /* "filler" column defaults to NULL */ - printfPQExpBuffer(&sql, - "insert into pgbench_tellers(tid,bid,tbalance) values (%d,%d,0)", - i + 1, i / ntellers + 1); - executeStatement(con, sql.data); - } - - /* - * accounts is big enough to be worth using COPY and tracking runtime - */ - - /* use COPY with FREEZE on v14 and later without partitioning */ - if (partitions == 0 && PQserverVersion(con) >= 140000) - copy_statement = "copy pgbench_accounts from stdin with (freeze on)"; - else - copy_statement = "copy pgbench_accounts from stdin"; + n = pg_snprintf(copy_statement, sizeof(copy_statement), copy_statement_fmt, table); + if (n >= sizeof(copy_statement)) + pg_fatal("invalid buffer size: must be at least %d characters long", n); + else if (n == -1) + pg_fatal("invalid format string"); res = PQexec(con, copy_statement); @@ -4931,14 +4935,11 @@ initGenerateDataClientSide(PGconn *con) start = pg_time_now(); - for (k = 0; k < (int64) naccounts * scale; k++) + for (k = 0; k < total; k++) { int64 j = k + 1; - /* "filler" column defaults to blank padded empty string */ - printfPQExpBuffer(&sql, - INT64_FORMAT "\t" INT64_FORMAT "\t0\t\n", - j, k / naccounts + 1); + init_row(&sql, k); if (PQputline(con, sql.data)) pg_fatal("PQputline failed"); @@ -4952,25 +4953,26 @@ initGenerateDataClientSide(PGconn *con) if ((!use_quiet) && (j % 100000 == 0)) { double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start); - double remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j; + double remaining_sec = ((double) total - j) * elapsed_sec / j; - fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c", - j, (int64) naccounts * scale, - (int) (((int64) j * 100) / (naccounts * (int64) scale)), - elapsed_sec, remaining_sec, eol); + chars = fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) of %s done (elapsed %.2f s, remaining %.2f s)%c", + j, total, + (int) ((j * 100) / total), + table, elapsed_sec, remaining_sec, eol); } /* let's not call the timing for each row, but only each 100 rows */ else if (use_quiet && (j % 100 == 0)) { double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start); - double remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j; + double remaining_sec = ((double) total - j) * elapsed_sec / j; /* have we reached the next interval (or end)? */ - if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS)) + if ((j == total) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS)) { - fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c", - j, (int64) naccounts * scale, - (int) (((int64) j * 100) / (naccounts * (int64) scale)), elapsed_sec, remaining_sec, eol); + chars = fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) of %s done (elapsed %.2f s, remaining %.2f s)%c", + j, total, + (int) ((j * 100) / total), + table, elapsed_sec, remaining_sec, eol); /* skip to the next interval */ log_interval = (int) ceil(elapsed_sec / LOG_STEP_SECONDS); @@ -4978,8 +4980,8 @@ initGenerateDataClientSide(PGconn *con) } } - if (eol != '\n') - fputc('\n', stderr); /* Need to move to next line */ + if (chars != 0 && eol != '\n') + fprintf(stderr, "%*c\r", chars - 1, ' '); /* Clear the current line */ if (PQputline(con, "\\.\n")) pg_fatal("very last PQputline failed"); @@ -4987,6 +4989,35 @@ initGenerateDataClientSide(PGconn *con) pg_fatal("PQendcopy failed"); termPQExpBuffer(&sql); +} + +/* + * Fill the standard tables with some data generated and sent from the client. + * + * The filler column is NULL in pgbench_branches and pgbench_tellers, and is + * a blank-padded string in pgbench_accounts. + */ +static void +initGenerateDataClientSide(PGconn *con) +{ + fprintf(stderr, "generating data (client-side)...\n"); + + /* + * we do all of this in one transaction to enable the backend's + * data-loading optimizations + */ + executeStatement(con, "begin"); + + /* truncate away any old data */ + initTruncateTables(con); + + /* + * fill branches, tellers, accounts in that order in case foreign keys + * already exist + */ + initPopulateTable(con, "pgbench_branches", nbranches, initBranch); + initPopulateTable(con, "pgbench_tellers", ntellers, initTeller); + initPopulateTable(con, "pgbench_accounts", naccounts, initAccount); executeStatement(con, "commit"); } diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index f8ca8a922d..5d0745b942 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -8,6 +8,35 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; +# Check the initial state of the data generated. Tables for tellers and +# branches use NULL for their filler attribute. The table accounts uses +# a non-NULL filler. The history table should have no data. +sub check_data_state +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my $node = shift; + my $type = shift; + + my $sql_result = $node->safe_psql('postgres', + 'SELECT count(*) AS null_count FROM pgbench_accounts WHERE filler IS NULL;' + ); + is($sql_result, '0', + "$type: filler column of pgbench_accounts has no NULL data"); + $sql_result = $node->safe_psql('postgres', + 'SELECT count(*) AS null_count FROM pgbench_branches WHERE filler IS NULL;' + ); + is($sql_result, '1', + "$type: filler column of pgbench_branches has only NULL data"); + $sql_result = $node->safe_psql('postgres', + 'SELECT count(*) AS null_count FROM pgbench_tellers WHERE filler IS NULL;' + ); + is($sql_result, '10', + "$type: filler column of pgbench_tellers has only NULL data"); + $sql_result = $node->safe_psql('postgres', + 'SELECT count(*) AS data_count FROM pgbench_history;'); + is($sql_result, '0', "$type: pgbench_history has no data"); +} + # start a pgbench specific server my $node = PostgreSQL::Test::Cluster->new('main'); # Set to untranslated messages, to be able to compare program output with @@ -67,6 +96,9 @@ $node->pgbench( ], 'pgbench scale 1 initialization',); +# Check data state, after client-side data generation. +check_data_state($node, 'client-side'); + # Again, with all possible options $node->pgbench( '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash', @@ -101,6 +133,9 @@ $node->pgbench( ], 'pgbench --init-steps'); +# Check data state, after server-side data generation. +check_data_state($node, 'server-side'); + # Run all builtin scripts, for a few transactions each $node->pgbench( '--transactions=5 -Dfoo=bla --client=2 --protocol=simple --builtin=t' diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 850028557d..ccec09b757 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -231,10 +231,11 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d extensively through a <command>COPY</command>. <command>pgbench</command> uses the FREEZE option with version 14 or later of <productname>PostgreSQL</productname> to speed up - subsequent <command>VACUUM</command>, unless partitions are enabled. - Using <literal>g</literal> causes logging to print one message - every 100,000 rows while generating data for the - <structname>pgbench_accounts</structname> table. + subsequent <command>VACUUM</command>. If partitions are enabled for + the <literal>pgbench_accounts</literal> table, the FREEZE option is + not enabled on it. Using <literal>g</literal> causes logging to + print one message every 100,000 rows while generating data for all + tables. </para> <para> With <literal>G</literal> (server-side data generation), -- 2.40.1
signature.asc
Description: PGP signature