Hello Tom,
Here is a POC which defines an internal interface for a PRNG, and use it
within pgbench, with several possible implementations which default to
rand48.
I seriously dislike this patch. pgbench's random support is quite
overengineered already IMO, and this proposes to add a whole batch of
new code and new APIs to fix a very small bug.
My intention is rather to discuss postgres' PRNG, in passing. Full success
on this point:-)
Our immediate problem is to fix a portability failure, which we need to
back-patch into at least one released branch, ergo conservatism is
warranted.
Sure, the patch I sent is definitely not for backpatching, it is for
discussion.
I had in mind something more like the attached.
Yep.
I'm not too happy that it mixes API levels, and about the int/double/int
path.
Attached an updated version which relies on pg_jrand48 instead. Also, as
the pseudo-random state is fully controlled, seeded test results are
deterministic so the expected value can be fully checked.
I did a few sanity tests which were all ok.
I think that this version is appropriate for backpatching. I also think
that it would be appropriate to consider having a better PRNG to replace
rand48 in a future release.
--
Fabien.
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index b5c75ce1c6..27aac479e3 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -185,7 +185,7 @@ int64 latency_limit = 0;
char *tablespace = NULL;
char *index_tablespace = NULL;
-/* random seed used when calling srandom() */
+/* random seed used to initialize base_random_sequence */
int64 random_seed = -1;
/*
@@ -287,6 +287,9 @@ typedef struct RandomState
unsigned short xseed[3];
} RandomState;
+/* Various random sequences are initialized from this one. */
+static RandomState base_random_sequence;
+
/*
* Connection state machine states.
*/
@@ -598,6 +601,8 @@ static const BuiltinScript builtin_script[] =
/* Function prototypes */
+static void initRandomState(RandomState *random_state);
+static int64 getrand(RandomState *random_state, int64 min, int64 max);
static void setNullValue(PgBenchValue *pv);
static void setBoolValue(PgBenchValue *pv, bool bval);
static void setIntValue(PgBenchValue *pv, int64 ival);
@@ -833,16 +838,28 @@ strtodouble(const char *str, bool errorOK, double *dv)
/*
* Initialize a random state struct.
+ *
+ * We derive the seed from base_random_sequence, which must be set up already.
*/
static void
initRandomState(RandomState *random_state)
{
- random_state->xseed[0] = random();
- random_state->xseed[1] = random();
- random_state->xseed[2] = random();
+ random_state->xseed[0] = (unsigned short)
+ pg_jrand48(base_random_sequence.xseed) & 0xFFFF;
+ random_state->xseed[1] = (unsigned short)
+ pg_jrand48(base_random_sequence.xseed) & 0xFFFF;
+ random_state->xseed[2] = (unsigned short)
+ pg_jrand48(base_random_sequence.xseed) & 0xFFFF;
}
-/* random number generator: uniform distribution from min to max inclusive */
+/*
+ * Random number generator: uniform distribution from min to max inclusive.
+ *
+ * Although the limits are expressed as int64, you can't generate the full
+ * int64 range in one call, because the difference of the limits mustn't
+ * overflow int64. In practice it's unwise to ask for more than an int32
+ * range, because of the limited precision of pg_erand48().
+ */
static int64
getrand(RandomState *random_state, int64 min, int64 max)
{
@@ -5126,12 +5143,14 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
}
}
-/* call srandom based on some seed. NULL triggers the default behavior. */
+/*
+ * Set up a random seed according to seed parameter (NULL means default),
+ * and initialize base_random_sequence for use in initializing other sequences.
+ */
static bool
set_random_seed(const char *seed)
{
- /* srandom expects an unsigned int */
- unsigned int iseed;
+ uint64 iseed;
if (seed == NULL || strcmp(seed, "time") == 0)
{
@@ -5139,7 +5158,7 @@ set_random_seed(const char *seed)
instr_time now;
INSTR_TIME_SET_CURRENT(now);
- iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now);
+ iseed = (uint64) INSTR_TIME_GET_MICROSEC(now);
}
else if (strcmp(seed, "rand") == 0)
{
@@ -5155,7 +5174,7 @@ set_random_seed(const char *seed)
/* parse seed unsigned int value */
char garbage;
- if (sscanf(seed, "%u%c", &iseed, &garbage) != 1)
+ if (sscanf(seed, UINT64_FORMAT "%c", &iseed, &garbage) != 1)
{
fprintf(stderr,
"unrecognized random seed option \"%s\": expecting an unsigned integer, \"time\" or \"rand\"\n",
@@ -5165,10 +5184,14 @@ set_random_seed(const char *seed)
}
if (seed != NULL)
- fprintf(stderr, "setting random seed to %u\n", iseed);
- srandom(iseed);
- /* no precision loss: 32 bit unsigned int cast to 64 bit int */
+ fprintf(stderr, "setting random seed to " UINT64_FORMAT "\n", iseed);
random_seed = iseed;
+
+ /* Fill base_random_sequence with low-order bits of seed */
+ base_random_sequence.xseed[0] = iseed & 0xFFFF;
+ base_random_sequence.xseed[1] = (iseed >> 16) & 0xFFFF;
+ base_random_sequence.xseed[2] = (iseed >> 32) & 0xFFFF;
+
return true;
}
@@ -5862,10 +5885,9 @@ main(int argc, char **argv)
/* set default seed for hash functions */
if (lookupVariable(&state[0], "default_seed") == NULL)
{
- uint64 seed = ((uint64) (random() & 0xFFFF) << 48) |
- ((uint64) (random() & 0xFFFF) << 32) |
- ((uint64) (random() & 0xFFFF) << 16) |
- (uint64) (random() & 0xFFFF);
+ uint64 seed =
+ ((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) |
+ (((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) << 32);
for (i = 0; i < nclients; i++)
if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index c87748086a..4aaac76c03 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -259,11 +259,11 @@ pgbench(
[
qr{setting random seed to 5432\b},
- # After explicit seeding, the four * random checks (1-3,20) should be
- # deterministic, but not necessarily portable.
- qr{command=1.: int 1\d\b}, # uniform random: 12 on linux
- qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux
- qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux
+ # After explicit seeding, the four random checks (1-3,20) are
+ # deterministic
+ qr{command=1.: int 13\b}, # uniform random
+ qr{command=2.: int 116\b}, # exponential random
+ qr{command=3.: int 1498\b}, # gaussian random
qr{command=4.: int 4\b},
qr{command=5.: int 5\b},
qr{command=6.: int 6\b},
@@ -276,7 +276,7 @@ pgbench(
qr{command=15.: double 15\b},
qr{command=16.: double 16\b},
qr{command=17.: double 17\b},
- qr{command=20.: int \d\b}, # zipfian random: 1 on linux
+ qr{command=20.: int 1\b}, # zipfian random
qr{command=21.: double -27\b},
qr{command=22.: double 1024\b},
qr{command=23.: double 1\b},