But now the documentation is back to its original state of silence on
what base or how many bases might be allowed. Could it just say
"or an unsigned decimal integer value"? Then no one will wonder.
Done in the attached.
Thanks for the reviews.
--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 5f28023..86a91ba 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -680,6 +680,43 @@ pgbench <optional> <replaceable>options</replaceable>
</optional> <replaceable>d
</varlistentry>
<varlistentry>
+
<term><option>--random-seed=</option><replaceable>SEED</replaceable></term>
+ <listitem>
+ <para>
+ Set random generator seed. Seeds the system random number generator,
+ which then produces a sequence of initial generator states, one for
+ each thread.
+ Values for <replaceable>SEED</replaceable> may be:
+ <literal>time</literal> (the default, the seed is based on the current
time),
+ <literal>rand</literal> (use a strong random source, failing if none
+ is available), or an unsigned decimal integer value.
+ The random generator is invoked explicitly from a pgbench script
+ (<literal>random...</literal> functions) or implicitly (for instance
option
+ <option>--rate</option> uses it to schedule transactions).
+ When explicitly set, the value used for seeding is shown on the
terminal.
+ Any value allowed for <replaceable>SEED</replaceable> may also be
+ provided through the environment variable
+ <literal>PGBENCH_RANDOM_SEED</literal>.
+ To ensure that the provided seed impacts all possible uses, put this
option
+ first or use the environment variable.
+ </para>
+ <para>
+ Setting the seed explicitly allows to reproduce a
<command>pgbench</command>
+ run exactly, as far as random numbers are concerned.
+ As the random state is managed per thread, this means the exact same
+ <command>pgbench</command> run for an identical invocation if there is
one
+ client per thread and there are no external or data dependencies.
+ From a statistical viewpoint reproducing runs exactly is a bad idea
because
+ it can hide the performance variability or improve performance unduly,
+ e.g. by hitting the same pages as a previous run.
+ However, it may also be of great help for debugging, for instance
+ re-running a tricky case which leads to an error.
+ Use wisely.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>--sampling-rate=<replaceable>rate</replaceable></option></term>
<listitem>
<para>
@@ -874,14 +911,19 @@ pgbench <optional> <replaceable>options</replaceable>
</optional> <replaceable>d
<tbody>
<row>
- <entry> <literal>scale</literal> </entry>
- <entry>current scale factor</entry>
- </row>
-
- <row>
<entry> <literal>client_id</literal> </entry>
<entry>unique number identifying the client session (starts from
zero)</entry>
</row>
+
+ <row>
+ <entry> <literal>random_seed</literal> </entry>
+ <entry>random generator seed (unless overwritten with
<option>-D</option>)</entry>
+ </row>
+
+ <row>
+ <entry> <literal>scale</literal> </entry>
+ <entry>current scale factor</entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 29d69de..a4c6c7b 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -146,6 +146,9 @@ int64 latency_limit = 0;
char *tablespace = NULL;
char *index_tablespace = NULL;
+/* random seed used when calling srandom() */
+int64 random_seed = -1;
+
/*
* end of configurable parameters
*********************************************************************/
@@ -561,6 +564,7 @@ usage(void)
" --log-prefix=PREFIX prefix for transaction time log
file\n"
" (default: \"pgbench_log\")\n"
" --progress-timestamp use Unix epoch timestamps for
progress\n"
+ " --random-seed=SEED set random seed (\"time\",
\"rand\", integer)\n"
" --sampling-rate=NUM fraction of transactions to log
(e.g., 0.01 for 1%%)\n"
"\nCommon options:\n"
" -d, --debug print debugging output\n"
@@ -4353,6 +4357,49 @@ printResults(TState *threads, StatsData *total,
instr_time total_time,
}
}
+/* call srandom based on some seed. NULL triggers the default behavior. */
+static void
+set_random_seed(const char *seed, const char *origin)
+{
+ /* srandom expects an unsigned int */
+ unsigned int iseed;
+
+ if (seed == NULL || strcmp(seed, "time") == 0)
+ {
+ /* rely on current time */
+ instr_time now;
+ INSTR_TIME_SET_CURRENT(now);
+ iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now);
+ }
+ else if (strcmp(seed, "rand") == 0)
+ {
+ /* use some "strong" random source */
+ if (!pg_strong_random(&iseed, sizeof(iseed)))
+ {
+ fprintf(stderr, "cannot seed random from a strong
source\n");
+ exit(1);
+ }
+ }
+ else
+ {
+ /* parse seed unsigned int value */
+ char garbage;
+ if (sscanf(seed, "%u%c", &iseed, &garbage) != 1)
+ {
+ fprintf(stderr,
+ "error while scanning '%s' from %s,
expecting an unsigned integer, 'time' or 'rand'\n",
+ seed, origin);
+ exit(1);
+ }
+ }
+
+ if (seed != NULL)
+ fprintf(stderr, "setting random seed to %u\n", iseed);
+ srandom(iseed);
+ /* no precision loss: 32 bit unsigned int cast to 64 bit int */
+ random_seed = iseed;
+}
+
int
main(int argc, char **argv)
@@ -4395,6 +4442,7 @@ main(int argc, char **argv)
{"progress-timestamp", no_argument, NULL, 6},
{"log-prefix", required_argument, NULL, 7},
{"foreign-keys", no_argument, NULL, 8},
+ {"random-seed", required_argument, NULL, 9},
{NULL, 0, NULL, 0}
};
@@ -4463,6 +4511,9 @@ main(int argc, char **argv)
state = (CState *) pg_malloc(sizeof(CState));
memset(state, 0, sizeof(CState));
+ /* set random seed early, because it may be used while parsing scripts.
*/
+ set_random_seed(getenv("PGBENCH_RANDOM_SEED"), "PGBENCH_RANDOM_SEED
environment variable");
+
while ((c = getopt_long(argc, argv,
"iI:h:nvp:dqb:SNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
{
char *script;
@@ -4735,6 +4786,10 @@ main(int argc, char **argv)
initialization_option_set = true;
foreign_keys = true;
break;
+ case 9: /* random-seed */
+ benchmarking_option_set = true;
+ set_random_seed(optarg, "--random-seed option");
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more
information.\n"), progname);
exit(1);
@@ -5024,6 +5079,16 @@ main(int argc, char **argv)
}
}
+ /* idem for :random_seed */
+ if (lookupVariable(&state[0], "random_seed") == NULL)
+ {
+ for (i = 0; i < nclients; i++)
+ {
+ if (!putVariableInt(&state[i], "startup",
"random_seed", random_seed))
+ exit(1);
+ }
+ }
+
if (!is_no_vacuum)
{
fprintf(stderr, "starting vacuum...");
@@ -5041,10 +5106,6 @@ main(int argc, char **argv)
}
PQfinish(con);
- /* set random seed */
- INSTR_TIME_SET_CURRENT(start_time);
- srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
-
/* set up thread data structures */
threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
nclients_dealt = 0;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl
b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 0c23d2f..2a69dfb 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -29,6 +29,12 @@ sub pgbench
$filename =~ s/\@\d+$//;
#push @filenames, $filename;
+ # filenames are expected to be unique on a test
+ if (-e $filename)
+ {
+ ok(0, "$filename must not already exists");
+ unlink $filename or die "cannot unlink
$filename: $!";
+ }
append_to_file($filename, $$files{$fn});
}
}
@@ -210,14 +216,18 @@ COMMIT;
} });
# test expressions
+# command 1..3 and 23 depend on random seed which is used to call srandom.
pgbench(
- '-t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808
-Dn=null -Dt=t -Df=of -Dd=1.0',
+ '--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3
-Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0',
0,
[ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ],
- [ qr{command=1.: int 1\d\b},
- qr{command=2.: int 1\d\d\b},
- qr{command=3.: int 1\d\d\d\b},
- qr{command=4.: int 4\b},
+ [ qr{setting random seed to 5432\b},
+ # After explicit seeding, the four * random checks (1-3,20)
should be
+ # deterministic, but not necessarily portable.
+ qr{command=1.: int 1\d\b}, # uniform random: 12 on linux
+ qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux
+ qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux
+ qr{command=4.: int 4\b},
qr{command=5.: int 5\b},
qr{command=6.: int 6\b},
qr{command=7.: int 7\b},
@@ -230,7 +240,7 @@ pgbench(
qr{command=16.: double 16\b},
qr{command=17.: double 17\b},
qr{command=18.: int 9223372036854775807\b},
- qr{command=20.: int [1-9]\b},
+ qr{command=20.: int \d\b}, # zipfian random: 1 on linux
qr{command=21.: double -27\b},
qr{command=22.: double 1024\b},
qr{command=23.: double 1\b},
@@ -259,6 +269,9 @@ pgbench(
qr{command=46.: int 46\b},
qr{command=47.: boolean true\b},
qr{command=48.: boolean true\b},
+ qr{command=53.: int 1\b}, # :scale
+ qr{command=54.: int 0\b}, # :client_id
+ qr{command=55.: int 5432\b}, # :random_seed
],
'pgbench expressions',
{ '001_pgbench_expressions' => q{-- integer functions
@@ -332,6 +345,10 @@ pgbench(
\set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)
\set yz debug(case when :zy = 0 or (1 / :zy) < 0 then -1 else (1 / :zy) end)
\set yz debug(case when :zy > 0 and (1 / :zy) < 0 then (1 / :zy) else 1 end)
+-- check automatic variables
+\set sc debug(:scale)
+\set ci debug(:client_id)
+\set rs debug(:random_seed)
-- substitute variables of all possible types
\set v0 NULL
\set v1 TRUE
@@ -340,6 +357,46 @@ pgbench(
SELECT :v0, :v1, :v2, :v3;
} });
+# random determinism when seeded
+$node->safe_psql('postgres',
+ 'CREATE UNLOGGED TABLE seeded_random(seed INT8 NOT NULL, rand TEXT NOT
NULL, val INTEGER NOT NULL);');
+
+# same value to check for determinism
+my $seed = int(rand(1000000000));
+for my $i (1, 2)
+{
+ pgbench("--random-seed=$seed -t 1",
+ 0,
+ [qr{processed: 1/1}],
+ [qr{setting random seed to $seed\b}],
+ "random seeded with $seed",
+ { "001_pgbench_random_seed_$i" => q{-- test random functions
+\set ur random(1000, 1999)
+\set er random_exponential(2000, 2999, 2.0)
+\set gr random_gaussian(3000, 3999, 3.0)
+\set zr random_zipfian(4000, 4999, 2.5)
+INSERT INTO seeded_random(seed, rand, val) VALUES
+ (:random_seed, 'uniform', :ur),
+ (:random_seed, 'exponential', :er),
+ (:random_seed, 'gaussian', :gr),
+ (:random_seed, 'zipfian', :zr);
+} });
+}
+
+# check that all runs generated the same 4 values
+my ($ret, $out, $err) =
+ $node->psql('postgres',
+ 'SELECT seed, rand, val, COUNT(*) FROM seeded_random GROUP BY seed,
rand, val');
+
+ok($ret == 0, "psql seeded_random count ok");
+ok($err eq '', "psql seeded_random count stderr is empty");
+ok($out =~ /\b$seed\|uniform\|1\d\d\d\|2/, "psql seeded_random count uniform");
+ok($out =~ /\b$seed\|exponential\|2\d\d\d\|2/, "psql seeded_random count
exponential");
+ok($out =~ /\b$seed\|gaussian\|3\d\d\d\|2/, "psql seeded_random count
gaussian");
+ok($out =~ /\b$seed\|zipfian\|4\d\d\d\|2/, "psql seeded_random count zipfian");
+
+$node->safe_psql('postgres', 'DROP TABLE seeded_random;');
+
# backslash commands
pgbench(
'-t 1', 0,
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl
b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 6ea55f8..c015f36 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -78,6 +78,8 @@ my @options = (
[ 'invalid init step', '-i -I dta',
[qr{unrecognized initialization step},
qr{allowed steps are} ] ],
+ [ 'bad random seed', '--random-seed=one',
+ [qr{error while scanning 'one' from --random-seed option,
expecting an unsigned integer} ] ],
# loging sub-options
[ 'sampling => log', '--sampling-rate=0.01',