Hello Masao-san,
+ snprintf(sql, sizeof(sql), + "insert into pgbench_branches(bid,bbalance) " + "select bid, 0 " + "from generate_series(1, %d) as bid", scale); "scale" should be "nbranches * scale".
Yep, even if nbranches is 1, it should be there.
+ snprintf(sql, sizeof(sql), + "insert into pgbench_accounts(aid,bid,abalance,filler) " + "select aid, (aid - 1) / %d + 1, 0, '' " + "from generate_series(1, %d) as aid", naccounts, scale * naccounts); Like client-side data generation, INT64_FORMAT should be used here instead of %d?
Indeed.
If large scale factor is specified, the query for generating pgbench_accounts data can take a very long time. While that query is running, operators may be likely to do Ctrl-C to cancel the data generation. In this case, IMO pgbench should cancel the query, i.e., call PQcancel(). Otherwise, the query will keep running to the end.
Hmmm. Why not. Now the infra to do that seems to already exists twice, once in "src/bin/psql/common.c" and once in "src/bin/scripts/common.c".
I cannot say I'm thrilled to replicate this once more. I think that the reasonable option is to share this in fe-utils and then to reuse it from there. However, ISTM that such a restructuring patch which not belong to this feature.
- for (step = initialize_steps; *step != '\0'; step++) + for (const char *step = initialize_steps; *step != '\0'; step++) Per PostgreSQL basic coding style,
C99 (20 years ago) is new the norm, and this style is now allowed, there are over a hundred instances of these already. I tend to use that where
appropriate.
- fprintf(stderr, "unrecognized initialization step \"%c\"\n", + fprintf(stderr, + "unrecognized initialization step \"%c\"\n" + "Allowed step characters are: \"" ALL_INIT_STEPS "\".\n", *step); - fprintf(stderr, "allowed steps are: \"d\", \"t\", \"g\", \"v\", \"p\", \"f\"\n"); The original message seems better to me. So what about just appending "G" into the above latter message? That is, "allowed steps are: \"d\", \"t\", \"g\", \"G\", \"v\", \"p\", \"f\"\n"
I needed this list in several places, so it makes sense to share the definition, and frankly the list of half a dozen comma-separated chars does not strike me as much better than just giving the allowed chars directly. So the simpler the better, from my point of view.
Isn't it better to explain a bit more what "client-side / server-side data generation" is? For example, something like
Ok.Attached v7 does most of the above, but the list of char message and the signal handling. The first one does not look really better to me, and the second one belongs to a restructuring patch that I'll try to submit.
-- Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index e3a0abb4c7..7be9c81c43 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -193,12 +193,25 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d </listitem> </varlistentry> <varlistentry> - <term><literal>g</literal> (Generate data)</term> + <term><literal>g</literal> or <literal>G</literal> (Generate data, client-side or server-side)</term> <listitem> <para> Generate data and load it into the standard tables, replacing any data already present. </para> + <para> + With <literal>g</literal> (client-side data generation), + data is generated in <command>pgbench</command> client and then + sent to the server. This uses the client/server bandwidth + extensively through a <command>COPY</command>. + </para> + <para> + With <literal>G</literal> (server-side data generation), + only limited queries are sent from <command>pgbench</command> + client and then data is actually generated in the server. + No significant bandwidth is required for this variant, but + the server will do more work. + </para> </listitem> </varlistentry> <varlistentry> diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 03bcd22996..7f5c1d00c8 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -131,8 +131,14 @@ static int pthread_join(pthread_t th, void **thread_return); /******************************************************************** * some configurable parameters */ +/* + * we do all data generation in one transaction to enable the backend's + * data-loading optimizations + */ #define DEFAULT_INIT_STEPS "dtgvp" /* default -I setting */ +#define ALL_INIT_STEPS "dtgGvpf" /* all possible steps */ + #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ @@ -627,7 +633,7 @@ usage(void) " %s [OPTION]... [DBNAME]\n" "\nInitialization options:\n" " -i, --initialize invokes initialization mode\n" - " -I, --init-steps=[dtgvpf]+ (default \"dtgvp\")\n" + " -I, --init-steps=[" ALL_INIT_STEPS "]+ (default \"" DEFAULT_INIT_STEPS "\")\n" " run selected initialization steps\n" " -F, --fillfactor=NUM set fill factor\n" " -n, --no-vacuum do not run VACUUM during initialization\n" @@ -3803,10 +3809,23 @@ append_fillfactor(char *opts, int len) } /* - * Fill the standard tables with some data + * truncate away any old data, in one command in case there are foreign keys */ static void -initGenerateData(PGconn *con) +initTruncateTables(PGconn *con) +{ + executeStatement(con, "truncate table " + "pgbench_accounts, " + "pgbench_branches, " + "pgbench_history, " + "pgbench_tellers"); +} + +/* + * Fill the standard tables with some data, from the client side + */ +static void +initGenerateDataClientSide(PGconn *con) { char sql[256]; PGresult *res; @@ -3820,7 +3839,7 @@ initGenerateData(PGconn *con) remaining_sec; int log_interval = 1; - fprintf(stderr, "generating data...\n"); + fprintf(stderr, "generating data client side...\n"); /* * we do all of this in one transaction to enable the backend's @@ -3828,15 +3847,7 @@ initGenerateData(PGconn *con) */ executeStatement(con, "begin"); - /* - * truncate away any old data, in one command in case there are foreign - * keys - */ - executeStatement(con, "truncate table " - "pgbench_accounts, " - "pgbench_branches, " - "pgbench_history, " - "pgbench_tellers"); + initTruncateTables(con); /* * fill branches, tellers, accounts in that order in case foreign keys @@ -3940,6 +3951,49 @@ initGenerateData(PGconn *con) executeStatement(con, "commit"); } +/* + * Fill the standard tables with some data, from the server side + * + * As already the case with the client-side filling, the filler + * column defaults to NULL in pgbench_branches and pgbench_tellers, + * and is a blank-padded string in pgbench_accounts. + */ +static void +initGenerateDataServerSide(PGconn *con) +{ + char sql[256]; + + fprintf(stderr, "generating data server side...\n"); + + /* + * we do all of this in one transaction to enable the backend's + * data-loading optimizations + */ + executeStatement(con, "begin"); + + initTruncateTables(con); + + snprintf(sql, sizeof(sql), + "insert into pgbench_branches(bid,bbalance) " + "select bid, 0 " + "from generate_series(1, %d) as bid", nbranches * scale); + executeStatement(con, sql); + + snprintf(sql, sizeof(sql), + "insert into pgbench_tellers(tid,bid,tbalance) " + "select tid, (tid - 1) / %d + 1, 0 " + "from generate_series(1, %d) as tid", ntellers, ntellers * scale); + executeStatement(con, sql); + + snprintf(sql, sizeof(sql), + "insert into pgbench_accounts(aid,bid,abalance,filler) " + "select aid, (aid - 1) / %d + 1, 0, '' " + "from generate_series(1, "INT64_FORMAT") as aid", naccounts, (int64) naccounts * scale); + executeStatement(con, sql); + + executeStatement(con, "commit"); +} + /* * Invoke vacuum on the standard tables */ @@ -4020,21 +4074,20 @@ initCreateFKeys(PGconn *con) static void checkInitSteps(const char *initialize_steps) { - const char *step; - if (initialize_steps[0] == '\0') { fprintf(stderr, "no initialization steps specified\n"); exit(1); } - for (step = initialize_steps; *step != '\0'; step++) + for (const char *step = initialize_steps; *step != '\0'; step++) { - if (strchr("dtgvpf ", *step) == NULL) + if (strchr(ALL_INIT_STEPS " ", *step) == NULL) { - fprintf(stderr, "unrecognized initialization step \"%c\"\n", + fprintf(stderr, + "unrecognized initialization step \"%c\"\n" + "Allowed step characters are: \"" ALL_INIT_STEPS "\".\n", *step); - fprintf(stderr, "allowed steps are: \"d\", \"t\", \"g\", \"v\", \"p\", \"f\"\n"); exit(1); } } @@ -4075,8 +4128,12 @@ runInitSteps(const char *initialize_steps) initCreateTables(con); break; case 'g': - op = "generate"; - initGenerateData(con); + op = "client generate"; + initGenerateDataClientSide(con); + break; + case 'G': + op = "server generate"; + initGenerateDataServerSide(con); break; case 'v': op = "vacuum"; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index c441626d7c..4384d0072a 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -130,7 +130,7 @@ pgbench( # Test interaction of --init-steps with legacy step-selection options pgbench( - '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', + '--initialize --init-steps=dtpvGvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', 0, [qr{^$}], [ @@ -138,7 +138,7 @@ pgbench( qr{creating tables}, qr{creating 3 partitions}, qr{creating primary keys}, - qr{.* of .* tuples \(.*\) done}, + qr{generating data server side}, qr{creating foreign keys}, qr{(?!vacuuming)}, # no vacuum qr{done in \d+\.\d\d s } diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index 1e9542af3f..8b6d442812 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -147,7 +147,7 @@ my @options = ( [ 'invalid init step', '-i -I dta', - [ qr{unrecognized initialization step}, qr{allowed steps are} ] + [ qr{unrecognized initialization step}, qr{Allowed step characters are} ] ], [ 'bad random seed',