On Wed Jul 12, 2023 at 10:52 PM CDT, Michael Paquier wrote:
On Wed, Jul 12, 2023 at 09:29:35AM -0500, Tristan Partin wrote:
> On Wed Jul 12, 2023 at 1:06 AM CDT, Michael Paquier wrote:
>> This would use the freeze option only on pgbench_accounts when no
>> partitioning is defined, but my point was a bit different. We could
>> use the FREEZE option on the teller and branch tables as well, no?
>> Okay, the impact is limited compared to accounts in terms of amount of
>> data loaded, but perhaps some people like playing with large scaling
>> factors where this could show a benefit in the initial data loading.
>
> Perhaps, should they all be keyed off the same option? Seemed like in
> your previous comment you wanted multiple options. Sorry for not reading
> your comment correctly.
I would have though that --partition should only apply to the
pgbench_accounts table, while FREEZE should apply where it is possible
to use it, aka all the COPY queries except when pgbench_accounts is a
partition. Would you do something different, like not applying FREEZE
to pgbench_tellers and pgbench_branches as these have much less tuples
than pgbench_accounts?
Michael,
I think I completely misunderstood you. From what I can tell, you want
to use FREEZE wherever possible. I think the new patch covers your
comment and fixes the documentation. I am hoping that I have finally
gotten what you are looking for.
--
Tristan Partin
Neon (https://neon.tech)
From 6fcc685fad9640818da7c8c5ff8a10d3fcf1aaed Mon Sep 17 00:00:00 2001
From: Tristan Partin <tris...@neon.tech>
Date: Tue, 23 May 2023 11:48:16 -0500
Subject: [PATCH v4] Use COPY instead of INSERT for populating all tables
COPY is a better interface for bulk loading and/or high latency
connections. Previously COPY was only used for the pgbench_accounts
table because the amount of data was so much larger than the other
tables. However, as already said there are also gains to be had in the
high latency case, such as loading data across continents.
---
src/bin/pgbench/pgbench.c | 155 ++++++++++++++++++++++----------------
1 file changed, 90 insertions(+), 65 deletions(-)
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 087baa7d57..6db1500151 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -4859,17 +4859,46 @@ initTruncateTables(PGconn *con)
"pgbench_tellers");
}
-/*
- * Fill the standard tables with some data generated and sent from the client
- */
+typedef void initRow(PQExpBufferData *sql, int64 curr);
+
static void
-initGenerateDataClientSide(PGconn *con)
+initBranch(PQExpBufferData *sql, int64 curr)
+{
+ /* "filler" column defaults to NULL */
+ printfPQExpBuffer(sql,
+ INT64_FORMAT "\t0\t\n",
+ curr + 1);
+}
+
+static void
+initTeller(PQExpBufferData *sql, int64 curr)
+{
+ /* "filler" column defaults to NULL */
+ printfPQExpBuffer(sql,
+ INT64_FORMAT "\t" INT64_FORMAT "\t0\t\n",
+ curr + 1, curr / ntellers + 1);
+}
+
+static void
+initAccount(PQExpBufferData *sql, int64 curr)
+{
+ /* "filler" column defaults to blank padded empty string */
+ printfPQExpBuffer(sql,
+ INT64_FORMAT "\t" INT64_FORMAT "\t0\t\n",
+ curr + 1, curr / naccounts + 1);
+}
+
+static void
+initPopulateTable(PGconn *con, const char *table, int64 base, initRow *init_row)
{
+ int n;
+ int k;
+ int chars = 0;
+ PGresult *res;
PQExpBufferData sql;
- PGresult *res;
- int i;
- int64 k;
- char *copy_statement;
+ char copy_statement[256];
+ const char *copy_statement_fmt;
+ const int64 total = base * scale;
/* used to track elapsed time and estimate of the remaining time */
pg_time_usec_t start;
@@ -4878,50 +4907,22 @@ initGenerateDataClientSide(PGconn *con)
/* Stay on the same line if reporting to a terminal */
char eol = isatty(fileno(stderr)) ? '\r' : '\n';
- fprintf(stderr, "generating data (client-side)...\n");
-
- /*
- * we do all of this in one transaction to enable the backend's
- * data-loading optimizations
- */
- executeStatement(con, "begin");
-
- /* truncate away any old data */
- initTruncateTables(con);
-
initPQExpBuffer(&sql);
/*
- * fill branches, tellers, accounts in that order in case foreign keys
- * already exist
+ * Use COPY with FREEZE on v14 and later without partitioning. Remember
+ * that partitions only applies to pgbench_accounts.
*/
- for (i = 0; i < nbranches * scale; i++)
- {
- /* "filler" column defaults to NULL */
- printfPQExpBuffer(&sql,
- "insert into pgbench_branches(bid,bbalance) values(%d,0)",
- i + 1);
- executeStatement(con, sql.data);
- }
-
- for (i = 0; i < ntellers * scale; i++)
- {
- /* "filler" column defaults to NULL */
- printfPQExpBuffer(&sql,
- "insert into pgbench_tellers(tid,bid,tbalance) values (%d,%d,0)",
- i + 1, i / ntellers + 1);
- executeStatement(con, sql.data);
- }
-
- /*
- * accounts is big enough to be worth using COPY and tracking runtime
- */
-
- /* use COPY with FREEZE on v14 and later without partitioning */
- if (partitions == 0 && PQserverVersion(con) >= 140000)
- copy_statement = "copy pgbench_accounts from stdin with (freeze on)";
+ if (partitions == 0 && strcmp(table, "pgbench_accounts") == 0 && PQserverVersion(con) >= 140000)
+ copy_statement_fmt = "copy %s from stdin with (freeze on)";
else
- copy_statement = "copy pgbench_accounts from stdin";
+ copy_statement_fmt = "copy %s from stdin";
+
+ n = pg_snprintf(copy_statement, sizeof(copy_statement), copy_statement_fmt, table);
+ if (n >= sizeof(copy_statement))
+ pg_fatal("invalid buffer size: must be at least %d characters long", n);
+ else if (n == -1)
+ pg_fatal("invalid format string");
res = PQexec(con, copy_statement);
@@ -4931,14 +4932,11 @@ initGenerateDataClientSide(PGconn *con)
start = pg_time_now();
- for (k = 0; k < (int64) naccounts * scale; k++)
+ for (k = 0; k < total; k++)
{
- int64 j = k + 1;
+ const int64 j = k + 1;
- /* "filler" column defaults to blank padded empty string */
- printfPQExpBuffer(&sql,
- INT64_FORMAT "\t" INT64_FORMAT "\t0\t\n",
- j, k / naccounts + 1);
+ init_row(&sql, k);
if (PQputline(con, sql.data))
pg_fatal("PQputline failed");
@@ -4952,25 +4950,26 @@ initGenerateDataClientSide(PGconn *con)
if ((!use_quiet) && (j % 100000 == 0))
{
double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
- double remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
+ double remaining_sec = ((double) total - j) * elapsed_sec / j;
- fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
- j, (int64) naccounts * scale,
- (int) (((int64) j * 100) / (naccounts * (int64) scale)),
- elapsed_sec, remaining_sec, eol);
+ chars = fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) of %s done (elapsed %.2f s, remaining %.2f s)%c",
+ j, total,
+ (int) ((j * 100) / total),
+ table, elapsed_sec, remaining_sec, eol);
}
/* let's not call the timing for each row, but only each 100 rows */
else if (use_quiet && (j % 100 == 0))
{
double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
- double remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
+ double remaining_sec = ((double) total - j) * elapsed_sec / j;
/* have we reached the next interval (or end)? */
- if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
+ if ((j == total) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
{
- fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
- j, (int64) naccounts * scale,
- (int) (((int64) j * 100) / (naccounts * (int64) scale)), elapsed_sec, remaining_sec, eol);
+ chars = fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) of %s done (elapsed %.2f s, remaining %.2f s)%c",
+ j, total,
+ (int) ((j * 100) / total),
+ table, elapsed_sec, remaining_sec, eol);
/* skip to the next interval */
log_interval = (int) ceil(elapsed_sec / LOG_STEP_SECONDS);
@@ -4978,8 +4977,8 @@ initGenerateDataClientSide(PGconn *con)
}
}
- if (eol != '\n')
- fputc('\n', stderr); /* Need to move to next line */
+ if (chars != 0 && eol != '\n')
+ fprintf(stderr, "%*c\r", chars - 1, ' '); /* Clear the current line */
if (PQputline(con, "\\.\n"))
pg_fatal("very last PQputline failed");
@@ -4987,6 +4986,32 @@ initGenerateDataClientSide(PGconn *con)
pg_fatal("PQendcopy failed");
termPQExpBuffer(&sql);
+}
+
+/*
+ * Fill the standard tables with some data generated and sent from the client
+ */
+static void
+initGenerateDataClientSide(PGconn *con)
+{
+ fprintf(stderr, "generating data (client-side)...\n");
+
+ /*
+ * we do all of this in one transaction to enable the backend's
+ * data-loading optimizations
+ */
+ executeStatement(con, "begin");
+
+ /* truncate away any old data */
+ initTruncateTables(con);
+
+ /*
+ * fill branches, tellers, accounts in that order in case foreign keys
+ * already exist
+ */
+ initPopulateTable(con, "pgbench_branches", nbranches, initBranch);
+ initPopulateTable(con, "pgbench_tellers", ntellers, initTeller);
+ initPopulateTable(con, "pgbench_accounts", naccounts, initAccount);
executeStatement(con, "commit");
}
--
Tristan Partin
Neon (https://neon.tech)