Hello,
Failed regression test. It's necessary to change the first a in “allowed step characters are” to uppercase A in the regression test of 002_pgbench_no_server.pl.
Argh. I think I ran the test, then stupidly updated the message afterwards to better match best practices, without rechecking:-(
The behavior of "g" is different between v12 and the patche, and backward compatibility is lost. In v12, BEGIN and COMMIT are specified only by choosing "g". It's a problem that backward compatibility is lost.
Somehow yes, but I do not see this as an actual problem from a functional point of view: it just means that if you use a 'dtgvp' with the newer version and if the inserts were to fail, then they are not under an explicit transaction, so previous inserts are not cleaned up. However, this is a pretty unlikely case, and anyway the error is reported, so any user would be expected not to go on after the initialization phase.
So basically I do not see the very small regression for an unlikely corner case to induce any problem in practice.
The benefit of controlling where begin/end actually occur is that it may have an impact on performance, and it allows to check that.
When using ( and ) with the -I, the documentation should indicate that double quotes are required,
Or single quotes, or backslash, if launch from the command line. I added a mention of escaping or protection in the doc in that case.
and "v" not be able to enclose in ( and ).
That is a postgresql limitation, which may evolve. There could be others. I updated the doc to say that some commands may not work inside an explicit transaction.
When g is specified, null is inserted in the filler column of pgbentch_tellrs, acounts, branches. But when G is specified, empty string is inserted.
Indeed there is a small diff. ISTM that the actual filling with the initial client version is NULL for branches and tellers, and a blank-padded string for accounts.
I fixed the patch so that the end-result is the same with both g and G.
Do you have any intention of this difference?
Yes and no.I intended that tellers & branches filler are filled, but I did not really notice that the client side was implicitely using NULL, although it says so in a comment. Although I'm not happy with the fact because it cheats with the benchmark design which requires the filler columns to be really filled and stored as is, it is indeed the place to change this (bad) behavior.
Attached a v4 with the updates described above. -- Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index e3a0abb4c7..c5e4d17fd5 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -167,7 +167,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d <replaceable>init_steps</replaceable> specifies the initialization steps to be performed, using one character per step. Each step is invoked in the specified order. - The default is <literal>dtgvp</literal>. + The default is <literal>dt(g)vp</literal>. The available steps are: <variablelist> @@ -193,12 +193,40 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d </listitem> </varlistentry> <varlistentry> - <term><literal>g</literal> (Generate data)</term> + <term><literal>(</literal> (begin transaction)</term> + <listitem> + <para> + Emit a <command>BEGIN</command>. + </para> + <para> + Beware that some steps may not work when called within an explicit transaction. + </para> + <para> + Beware that using <literal>(</literal> on the command line requires some protection or escaping. + </para> + </listitem> + </varlistentry> + <varlistentry> + <term><literal>g</literal> or <literal>G</literal> (Generate data, client or server side)</term> <listitem> <para> Generate data and load it into the standard tables, replacing any data already present. </para> + <para> + When data is generated server side, there is no progress output. + </para> + </listitem> + </varlistentry> + <varlistentry> + <term><literal>)</literal> (commit transaction)</term> + <listitem> + <para> + Emit a <command>COMMIT</command>. + </para> + <para> + Beware that using <literal>)</literal> on the command line requires some protection or escaping. + </para> </listitem> </varlistentry> <varlistentry> diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index e72ad0036e..597562248a 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -130,7 +130,13 @@ static int pthread_join(pthread_t th, void **thread_return); /******************************************************************** * some configurable parameters */ -#define DEFAULT_INIT_STEPS "dtgvp" /* default -I setting */ +/* + * we do all data generation in one transaction to enable the backend's + * data-loading optimizations + */ +#define DEFAULT_INIT_STEPS "dt(g)vp" /* default -I setting */ + +#define ALL_INIT_STEPS "dtgGvpf()" /* all possible steps */ #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ @@ -626,7 +632,7 @@ usage(void) " %s [OPTION]... [DBNAME]\n" "\nInitialization options:\n" " -i, --initialize invokes initialization mode\n" - " -I, --init-steps=[dtgvpf]+ (default \"dtgvp\")\n" + " -I, --init-steps=[" ALL_INIT_STEPS "]+ (default \"" DEFAULT_INIT_STEPS "\")\n" " run selected initialization steps\n" " -F, --fillfactor=NUM set fill factor\n" " -n, --no-vacuum do not run VACUUM during initialization\n" @@ -3802,10 +3808,23 @@ append_fillfactor(char *opts, int len) } /* - * Fill the standard tables with some data + * truncate away any old data, in one command in case there are foreign keys */ static void -initGenerateData(PGconn *con) +initTruncateTables(PGconn *con) +{ + executeStatement(con, "truncate table " + "pgbench_accounts, " + "pgbench_branches, " + "pgbench_history, " + "pgbench_tellers"); +} + +/* + * Fill the standard tables with some data, from the client side + */ +static void +initGenerateDataClientSide(PGconn *con) { char sql[256]; PGresult *res; @@ -3819,23 +3838,9 @@ initGenerateData(PGconn *con) remaining_sec; int log_interval = 1; - fprintf(stderr, "generating data...\n"); + fprintf(stderr, "generating data client side...\n"); - /* - * we do all of this in one transaction to enable the backend's - * data-loading optimizations - */ - executeStatement(con, "begin"); - - /* - * truncate away any old data, in one command in case there are foreign - * keys - */ - executeStatement(con, "truncate table " - "pgbench_accounts, " - "pgbench_branches, " - "pgbench_history, " - "pgbench_tellers"); + initTruncateTables(con); /* * fill branches, tellers, accounts in that order in case foreign keys @@ -3935,8 +3940,41 @@ initGenerateData(PGconn *con) fprintf(stderr, "PQendcopy failed\n"); exit(1); } +} - executeStatement(con, "commit"); +/* + * Fill the standard tables with some data, from the server side + * + * As already the case with the client-side filling, the filler + * column defaults to NULL in pgbench_branches and pgbench_tellers, + * and is a blank-padded string in pgbench_accounts. + */ +static void +initGenerateDataServerSide(PGconn *con) +{ + char sql[256]; + + fprintf(stderr, "generating data server side...\n"); + + initTruncateTables(con); + + snprintf(sql, sizeof(sql), + "insert into pgbench_branches(bid,bbalance) " + "select bid, 0 " + "from generate_series(1, %d) as bid", scale); + executeStatement(con, sql); + + snprintf(sql, sizeof(sql), + "insert into pgbench_tellers(tid,bid,tbalance) " + "select tid, (tid - 1) / %d + 1, 0 " + "from generate_series(1, %d) as tid", ntellers, scale * ntellers); + executeStatement(con, sql); + + snprintf(sql, sizeof(sql), + "insert into pgbench_accounts(aid,bid,abalance,filler) " + "select aid, (aid - 1) / %d + 1, 0, '' " + "from generate_series(1, %d) as aid", naccounts, scale * naccounts); + executeStatement(con, sql); } /* @@ -4020,6 +4058,7 @@ static void checkInitSteps(const char *initialize_steps) { const char *step; + int begins = 0; if (initialize_steps[0] == '\0') { @@ -4029,13 +4068,40 @@ checkInitSteps(const char *initialize_steps) for (step = initialize_steps; *step != '\0'; step++) { - if (strchr("dtgvpf ", *step) == NULL) + if (strchr(ALL_INIT_STEPS " ", *step) == NULL) { - fprintf(stderr, "unrecognized initialization step \"%c\"\n", + fprintf(stderr, + "unrecognized initialization step \"%c\"\n" + "Allowed step characters are: \"" ALL_INIT_STEPS "\".\n", *step); - fprintf(stderr, "allowed steps are: \"d\", \"t\", \"g\", \"v\", \"p\", \"f\"\n"); exit(1); } + + /* count BEGIN/COMMIT for matching */ + if (*step == '(') + begins++; + else if (*step == ')') + begins--; + + if (begins < 0) + { + fprintf(stderr, "COMMIT \")\" at char %ld of \"%s\" does not have a matching BEGIN \"(\"\n", + step - initialize_steps, initialize_steps); + exit(1); + } + else if (begins >= 2) + { + fprintf(stderr, "BEGIN \"(\" within a BEGIN at char %ld of \"%s\", nested transactions are not supported\n", + step - initialize_steps, initialize_steps); + exit(1); + } + } + + if (begins > 0) + { + fprintf(stderr, "a BEGIN \"(\" in \"%s\" does not have a matching COMMIT \")\"\n", + initialize_steps); + exit(1); } } @@ -4073,9 +4139,20 @@ runInitSteps(const char *initialize_steps) op = "create tables"; initCreateTables(con); break; + case '(': + executeStatement(con, "begin"); + break; case 'g': - op = "generate"; - initGenerateData(con); + op = "client generate"; + initGenerateDataClientSide(con); + break; + case 'G': + op = "server generate"; + initGenerateDataServerSide(con); + break; + case ')': + op = "commit"; + executeStatement(con, "commit"); break; case 'v': op = "vacuum"; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index c441626d7c..ddfdc79efa 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -113,7 +113,7 @@ pgbench( # Again, with all possible options pgbench( - '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash', + '--initialize --init-steps=dtpv(g) --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash', 0, [qr{^$}i], [ @@ -130,7 +130,7 @@ pgbench( # Test interaction of --init-steps with legacy step-selection options pgbench( - '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', + '--initialize --init-steps=dtpv(G)vv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', 0, [qr{^$}], [ @@ -138,7 +138,7 @@ pgbench( qr{creating tables}, qr{creating 3 partitions}, qr{creating primary keys}, - qr{.* of .* tuples \(.*\) done}, + qr{generating data server side}, qr{creating foreign keys}, qr{(?!vacuuming)}, # no vacuum qr{done in \d+\.\d\d s } diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index 1e9542af3f..39badeb3aa 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -147,7 +147,22 @@ my @options = ( [ 'invalid init step', '-i -I dta', - [ qr{unrecognized initialization step}, qr{allowed steps are} ] + [ qr{unrecognized initialization step}, qr{Allowed step characters are} ] + ], + [ + 'invalid init step begin/begin', + '-i -I ((', + [ qr{nested transactions are not supported} ] + ], + [ + 'invalid init step end', + '-i -I )', + [ qr{does not have a matching BEGIN} ] + ], + [ + 'invalid init step begin', + '-i -I (dt', + [ qr{does not have a matching COMMIT} ] ], [ 'bad random seed',