Here is a rebase, plus some more changes:

I have improved the error message to tell from where the value was provided.

I have removed the test to the exact values produced from the expression test run.

I have added a test which run from the same seed value several times
and checks that the output values are the same.

This version adds a :random_seed script variable, for information.

--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..aa1a9e0 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -680,6 +680,45 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
      </varlistentry>
 
      <varlistentry>
+      <term><option>--random-seed=</option><replaceable>SEED</replaceable></term>
+      <listitem>
+       <para>
+        Set random generator seed.  Seeds the system random number generator,
+        which then produces a sequence of initial generator states, one for
+        each thread.
+        Values for <replaceable>SEED</replaceable> may be:
+        <literal>time</literal> (the default, the seed is based on the current time),
+        <literal>rand</literal> (use a strong random source, failing if none
+        is available), or any unsigned octal (<literal>012470</literal>),
+        decimal (<literal>5432</literal>) or
+        hexedecimal (<literal>0x1538</literal>) integer value.
+        The random generator is invoked explicitly from a pgbench script
+        (<literal>random...</literal> functions) or implicitly (for instance option
+        <option>--rate</option> uses it to schedule transactions).
+        When explicitely set, the value used for seeding is shown on the terminal.
+        Any value allowed for <replaceable>SEED</replaceable> may also be
+        provided through the environment variable
+        <literal>PGBENCH_RANDOM_SEED</literal>.
+        To ensure that the provided seed impacts all possible uses, put this option
+        first or use the environment variable.
+      </para>
+      <para>
+        Setting the seed explicitly allows to reproduce a <command>pgbench</command>
+        run exactly, as far as random numbers are concerned.
+        As the random state is managed per thread, this means the exact same
+        <command>pgbench</command> run for an identical invocation if there is one
+        client per thread and there are no external or data dependencies.
+        From a statistical viewpoint reproducing runs exactly is a bad idea because
+        it can hide the performance variability or improve performance unduly,
+        e.g. by hitting the same pages as a previous run.
+        However, it may also be of great help for debugging, for instance
+        re-running a tricky case which leads to an error.
+        Use wisely.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--sampling-rate=<replaceable>rate</replaceable></option></term>
       <listitem>
        <para>
@@ -874,14 +913,19 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
 
      <tbody>
       <row>
-       <entry> <literal>scale</literal> </entry>
-       <entry>current scale factor</entry>
-      </row>
-
-      <row>
        <entry> <literal>client_id</literal> </entry>
        <entry>unique number identifying the client session (starts from zero)</entry>
       </row>
+
+      <row>
+       <entry> <literal>random_seed</literal> </entry>
+       <entry>random generator seed</entry>
+      </row>
+
+      <row>
+       <entry> <literal>scale</literal> </entry>
+       <entry>current scale factor</entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 31ea6ca..c49a48a 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -146,6 +146,9 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/* random seed used when calling srandom() */
+int64 random_seed = -1;
+
 /*
  * end of configurable parameters
  *********************************************************************/
@@ -560,6 +563,7 @@ usage(void)
 		   "  --log-prefix=PREFIX      prefix for transaction time log file\n"
 		   "                           (default: \"pgbench_log\")\n"
 		   "  --progress-timestamp     use Unix epoch timestamps for progress\n"
+		   "  --random-seed=SEED       set random seed (\"time\", \"rand\", integer)\n"
 		   "  --sampling-rate=NUM      fraction of transactions to log (e.g., 0.01 for 1%%)\n"
 		   "\nCommon options:\n"
 		   "  -d, --debug              print debugging output\n"
@@ -4362,6 +4366,57 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
 	}
 }
 
+/* call srandom based on some seed. NULL triggers the default behavior. */
+static void
+set_random_seed(const char *seed, const char *origin)
+{
+	unsigned int iseed;
+
+	if (seed == NULL || strcmp(seed, "time") == 0)
+	{
+		/* rely on current time */
+		instr_time	now;
+		INSTR_TIME_SET_CURRENT(now);
+		iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now);
+	}
+	else if (strcmp(seed, "rand") == 0)
+	{
+		/* use some "strong" random source */
+		if (!pg_strong_random(&iseed, sizeof(iseed)))
+		{
+			fprintf(stderr, "cannot seed random from a strong source\n");
+			exit(1);
+		}
+	}
+	else
+	{
+		/* parse uint seed value */
+		char garbage;
+		if (!
+			/* hexa */
+			((seed[0] == '0' && (seed[1] == 'x' || seed[1] == 'X') &&
+			 sscanf(seed, "%x%c", &iseed, &garbage) == 1) ||
+			 /* octal */
+			(seed[0] == '0' && seed[1] != 'x' && seed[1] != 'X' &&
+			 sscanf(seed, "%o%c", &iseed, &garbage) == 1) ||
+			 /* decimal */
+			 (seed[0] != '0' &&
+			  sscanf(seed, "%u%c", &iseed, &garbage) == 1)))
+		{
+			fprintf(stderr,
+					"error while scanning '%s' from %s, expecting an unsigned integer, 'time' or 'rand'\n",
+					seed, origin);
+			exit(1);
+		}
+	}
+
+	if (seed != NULL)
+		fprintf(stderr, "setting random seed to %u\n", iseed);
+	srandom(iseed);
+	/* no precision loss: 32 bit unsigned int cast to 64 bit int */
+	random_seed = iseed;
+}
+
 
 int
 main(int argc, char **argv)
@@ -4404,6 +4459,7 @@ main(int argc, char **argv)
 		{"progress-timestamp", no_argument, NULL, 6},
 		{"log-prefix", required_argument, NULL, 7},
 		{"foreign-keys", no_argument, NULL, 8},
+		{"random-seed", required_argument, NULL, 9},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -4472,6 +4528,9 @@ main(int argc, char **argv)
 	state = (CState *) pg_malloc(sizeof(CState));
 	memset(state, 0, sizeof(CState));
 
+	/* set random seed early, because it may be used while parsing scripts. */
+	set_random_seed(getenv("PGBENCH_RANDOM_SEED"), "PGBENCH_RANDOM_SEED environment variable");
+
 	while ((c = getopt_long(argc, argv, "iI:h:nvp:dqb:SNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
 	{
 		char	   *script;
@@ -4744,6 +4803,10 @@ main(int argc, char **argv)
 				initialization_option_set = true;
 				foreign_keys = true;
 				break;
+			case 9:				/* random-seed */
+				benchmarking_option_set = true;
+				set_random_seed(optarg, "--random-seed option");
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5033,6 +5096,16 @@ main(int argc, char **argv)
 		}
 	}
 
+	/* idem for :random_seed */
+	if (lookupVariable(&state[0], "random_seed") == NULL)
+	{
+		for (i = 0; i < nclients; i++)
+		{
+			if (!putVariableInt(&state[i], "startup", "random_seed", random_seed))
+				exit(1);
+		}
+	}
+
 	if (!is_no_vacuum)
 	{
 		fprintf(stderr, "starting vacuum...");
@@ -5050,10 +5123,6 @@ main(int argc, char **argv)
 	}
 	PQfinish(con);
 
-	/* set random seed */
-	INSTR_TIME_SET_CURRENT(start_time);
-	srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
-
 	/* set up thread data structures */
 	threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
 	nclients_dealt = 0;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index a8b2962..de99e4e 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -210,14 +210,18 @@ COMMIT;
 } });
 
 # test expressions
+# command 1..3 and 23 depend on random seed which is used to call srandom.
 pgbench(
-	'-t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0',
+	'--random-seed=0x1538 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0',
 	0,
 	[ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ],
-	[   qr{command=1.: int 1\d\b},
-	    qr{command=2.: int 1\d\d\b},
-	    qr{command=3.: int 1\d\d\d\b},
-	    qr{command=4.: int 4\b},
+	[   qr{setting random seed to 5432\b},
+		# After explicit seeding, the four * random checks (1-3,20) should be
+		# deterministic, but not necessarily portable.
+		qr{command=1.: int 1\d\b}, # uniform random: 12 on linux
+		qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux
+		qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux
+		qr{command=4.: int 4\b},
 		qr{command=5.: int 5\b},
 		qr{command=6.: int 6\b},
 		qr{command=7.: int 7\b},
@@ -230,7 +234,7 @@ pgbench(
 		qr{command=16.: double 16\b},
 		qr{command=17.: double 17\b},
 		qr{command=18.: int 9223372036854775807\b},
-		qr{command=20.: int [1-9]\b},
+		qr{command=20.: int \d\b}, # zipfian random: 1 on linux
 		qr{command=21.: double -27\b},
 		qr{command=22.: double 1024\b},
 		qr{command=23.: double 1\b},
@@ -259,6 +263,9 @@ pgbench(
 		qr{command=46.: int 46\b},
 		qr{command=47.: boolean true\b},
 		qr{command=48.: boolean true\b},
+		qr{command=53.: int 1\b},    # :scale
+		qr{command=54.: int 0\b},    # :client_id
+		qr{command=55.: int 5432\b}, # :random_seed
 	],
 	'pgbench expressions',
 	{   '001_pgbench_expressions' => q{-- integer functions
@@ -332,6 +339,10 @@ pgbench(
 \set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)
 \set yz debug(case when :zy = 0 or (1 / :zy) < 0 then -1 else (1 / :zy) end)
 \set yz debug(case when :zy > 0 and (1 / :zy) < 0 then (1 / :zy) else 1 end)
+-- check automatic variables
+\set sc debug(:scale)
+\set ci debug(:client_id)
+\set rs debug(:random_seed)
 -- substitute variables of all possible types
 \set v0 NULL
 \set v1 TRUE
@@ -340,6 +351,45 @@ pgbench(
 SELECT :v0, :v1, :v2, :v3;
 } });
 
+# random determinism when seeded
+$node->safe_psql('postgres',
+	'CREATE UNLOGGED TABLE seeded_random(rand TEXT NOT NULL, val INTEGER NOT NULL);');
+
+# same value in decimal, octal and hexadecimal
+for my $seed ('5432', '012470', '0x1538', '0X1538')
+{
+    pgbench("--random-seed=$seed -t 1",
+	0,
+	[qr{processed: 1/1}],
+	[qr{setting random seed to 5432\b}],
+	"random seeded with $seed",
+	{ "001_pgbench_random_seed_$seed" => q{-- test random functions
+\set ur random(1000, 1999)
+\set er random_exponential(2000, 2999, 2.0)
+\set gr random_gaussian(3000, 3999, 3.0)
+\set zr random_zipfian(4000, 4999, 2.5)
+INSERT INTO seeded_random(rand, val) VALUES
+  ('uniform', :ur),
+  ('exponential', :er),
+  ('gaussian', :gr),
+  ('zipfian', :zr);
+} });
+}
+
+# check that all runs generated the same 4 values
+my ($ret, $out, $err) =
+  $node->psql('postgres',
+	'SELECT rand, val, COUNT(*) FROM seeded_random GROUP BY rand, val');
+
+ok($ret == 0, "psql seeded_random count ok");
+ok($err eq '', "psql seeded_random count stderr is empty");
+ok($out =~ /uniform\|1\d\d\d\|4/, "psql seeded_random count uniform");
+ok($out =~ /exponential\|2\d\d\d\|4/, "psql seeded_random count exponential");
+ok($out =~ /gaussian\|3\d\d\d\|4/, "psql seeded_random count gaussian");
+ok($out =~ /zipfian\|4\d\d\d\|4/, "psql seeded_random count zipfian");
+
+$node->safe_psql('postgres', 'DROP TABLE seeded_random;');
+
 =head
 
 } });
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 6ea55f8..c015f36 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -78,6 +78,8 @@ my @options = (
 	[ 'invalid init step', '-i -I dta',
 		[qr{unrecognized initialization step},
 		 qr{allowed steps are} ] ],
+	[ 'bad random seed', '--random-seed=one',
+		[qr{error while scanning 'one' from --random-seed option, expecting an unsigned integer} ] ],
 
 	# loging sub-options
 	[   'sampling => log', '--sampling-rate=0.01',

Reply via email to