Indeed, the patch needs a rebase & conflit resolution. I'll do it. Later.
Here is an update: - take advantage of pg_bitutils (although I noted that the "slow" popcount there could be speeded-up and shorten with a bitwise operator implementation that I just removed from pgbench). - add comments about the bijective transformations in the code.As already stated, this function makes sense for people who want to test performance with pgbench using non uniform rands. If you do not want to do that, you will probably find the function pretty useless. I can't help it.
Also, non uniform rands is also a way to test pg lock contention behavior. -- Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 9d18524834..14a1ae7b75 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -936,7 +936,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d <row> <entry> <literal>default_seed</literal> </entry> - <entry>seed used in hash functions by default</entry> + <entry>seed used in hash and pseudo-random permutation functions by default</entry> </row> <row> @@ -1481,6 +1481,13 @@ SELECT 2 AS two, 3 AS three \gset p_ <entry><literal>pow(2.0, 10)</literal>, <literal>power(2.0, 10)</literal></entry> <entry><literal>1024.0</literal></entry> </row> + <row> + <entry><literal><function>pr_perm(<replaceable>i</replaceable>, <replaceable>size</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry> + <entry>integer</entry> + <entry>pseudo-random permutation in [0,size)</entry> + <entry><literal>pr_perm(0, 4)</literal></entry> + <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal> or <literal>3</literal></entry> + </row> <row> <entry><literal><function>random(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>)</function></literal></entry> <entry>integer</entry> @@ -1651,6 +1658,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / </programlisting> </para> + <para> + Function <literal>pr_perm</literal> implements a pseudo-random permutation. + It allows to mix the output of non uniform random functions so that + values drawn more often are not trivially correlated. + It permutes integers in [0, size) using a seed by applying rounds of + simple invertible functions, similarly to an encryption function, + although beware that it is not at all cryptographically secure. + Compared to <literal>hash</literal> functions discussed above, the function + ensures that a perfect permutation is applied: there are no collisions + nor holes in the output values. + Values outside the interval are interpreted modulo the size. + The function errors if size is not positive. + If no seed is provided, <literal>:default_seed</literal> is used. + For a given size and seed, the function is fully deterministic: if two + permutations on the same size must not be correlated, use distinct seeds + as outlined in the previous example about hash functions. + </para> + <para> As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index 17c9ec32c5..13410787d4 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -19,6 +19,7 @@ #define PGBENCH_NARGS_VARIABLE (-1) #define PGBENCH_NARGS_CASE (-2) #define PGBENCH_NARGS_HASH (-3) +#define PGBENCH_NARGS_PRPERM (-4) PgBenchExpr *expr_parse_result; @@ -370,6 +371,9 @@ static const struct { "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A }, + { + "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM + }, /* keep as last array element */ { NULL, 0, 0 @@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) } break; + /* pseudo-random permutation function with optional seed argument */ + case PGBENCH_NARGS_PRPERM: + if (len < 2 || len > 3) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 2) + { + PgBenchExpr *var = make_variable("default_seed"); + args = make_elist(var, args); + } + break; + /* common case: positive arguments number */ default: Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 19532cfb54..bdbb9c252b 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -37,6 +37,7 @@ #include "getopt_long.h" #include "libpq-fe.h" #include "portability/instr_time.h" +#include "port/pg_bitutils.h" #include <ctype.h> #include <float.h> @@ -1157,6 +1158,301 @@ getHashMurmur2(int64 val, uint64 seed) return (int64) result; } +/* pseudo-random permutation */ + +/* 16 so that % 16 can be optimized to & 0x0f */ +#define PRP_PRIMES 16 +/* + * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/ + * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19 + */ +static uint64 primes[PRP_PRIMES] = { + UINT64CONST(8388617), + UINT64CONST(8912921), + UINT64CONST(9437189), + UINT64CONST(9961487), + UINT64CONST(10485767), + UINT64CONST(11010059), + UINT64CONST(11534351), + UINT64CONST(12058679), + UINT64CONST(12582917), + UINT64CONST(13107229), + UINT64CONST(13631489), + UINT64CONST(14155777), + UINT64CONST(14680067), + UINT64CONST(15204391), + UINT64CONST(15728681), + UINT64CONST(16252967) +}; + +/* how many "encryption" rounds to apply */ +#define PRP_ROUNDS 4 + +/* return smallest mask holding n */ +static uint64 +compute_mask(uint64 n) +{ + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + return n; +} + +/* length of n binary representation */ +static int +nbits(uint64 n) +{ + /* set lower bits to 1 and count them */ + return pg_popcount64(compute_mask(n)); +} + +/* + * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64). + * + * Use improved interleaved modular multiplication algorithm to avoid + * overflows when necessary. + */ +static uint64 +modular_multiply(uint64 x, uint64 y, const uint64 m) +{ + int i, + bits; + uint64 r; + + Assert(m >= 1); + + /* Because of (x * y) % m = (x % m * y % m) % m */ + if (x >= m) + x %= m; + if (y >= m) + y %= m; + + /* Return the trivial result. */ + if (x == 0 || y == 0 || m == 1) + return 0; + + /* Return the result if (x * y) can be multiplied without overflow. */ + if (nbits(x) + nbits(y) <= 64) + return (x * y) % m; + + /* To reduce the for loop in the algorithm below, ensure y <= x. */ + if (x < y) + { + uint64 tmp = x; + + x = y; + y = tmp; + } + + /*----- + * Interleaved modular multiplication algorithm from: + * + * D.N. Amanor et al., "Efficient hardware architecture for modular + * multiplication on FPGAs", in International Conference on Field + * Programmable Logic and Applications, Aug 2005, pp. 539-542. + * + * This algorithm is usually used in the field of digital circuit design. + * + * Input: X, Y, M; 0 <= X, Y <= M; + * Output: R = X * Y mod M; + * bits: number of bits of Y + * Y[i]: i th bit of Y + * + * 1. R = 0; + * 2. for (i = bits - 1; i >= 0; i--) { + * 3. R = 2 * R; + * 4. if (Y[i] == 0x1) + * 5. R += X; + * 6. if (R >= M) R -= M; + * 7. if (R >= M) R -= M; + * } + * + * In Steps 3 and 5, overflow should be avoided. The details are explained + * in each step. + * Steps 6 and 7 can be instead a modular operation (R %= M). + * + * Note that, in this implementation, the distributive property of modular + * operation shown in below is used whenever it can be applied. + * (X * Y) % M = X % M * Y % M, (X + Y) % M = X % M + Y % M + * + * For ease of understanding, an example is shown. + * Example: (11 * 5) % 7 + * + * [Notation] ":=" means substitution. + * + * [initial value] + * R := 0x0000 + * + * [i=2] + * R := (0x0000 * 2) % 0x0111 = 0x0000 + * R := (R + 0x1011) % 0x0111 = 0x0100 + * + * [i=1] + * R := (0x0100 * 2) % 0x0111 = 0x1000 % 0x0111 = 0x0001 + * + * [i=0] + * R := (0x0001 * 2) % 0x0111 = 0x0010 + * R := (R + 0x1011) % 0x0111 = 0x1101 % 0x0111 = 0x0110 + * + * [result] + * R = 6 + */ + + bits = nbits(y); + r = 0; + + for (i = bits - 1; i >= 0; i--) + { + if (r > UINT64CONST(0x7fffffffffffffff)) + /*----- + * To avoid overflow, transform from (2 * r) to (2 * r) % m, and + * further transform to mathematically equivalent form below: + * + * For ease of understanding, an example using one digit decimal number, + * where r = 6 and m = 7, is shown. + * (6 * 2) % 7 = (7 - 1) * 2 % 7 = (7 % 7 - 1 % 7) * 2 % 7 + * = (0 - 1) * 2 % 7 = -2 % 7 = 7 - 2 = 5. + * Generally, if (r * 2) overflows, + * (r * 2) % m = m - (m - r) * 2. + */ + r = m - ((m - r) << 1); + else + r <<= 1; + + if ((y >> i) & 0x1) + { + if (r > UINT64CONST(0xffffffffffffffff) - x) + /*----- + * To compute (r + x) without overflow: transform to + * (r + x) % m, and then to (r + x - m). + * + * An example using one digit decimal number, where r = 6, x = 5 and + * m = 7, is shown. + * (6 + 5) % 7 = (6 + (5 - 7 + 7)) % 7 = 6 % 7 + (5 - 7) % 7 + 7 % 7 + * = 6 + (5 - 7) + 0 = 4. + * Generally, if (r + x) overflows, + * (r + x) % m = r + (x - m). + */ + r += x - m; + else + r += x; + } + + r %= m; + } + + return r; +} + +/* + * Donald Knuth's linear congruential generator + * + * Relying on multiplication overflows is part of the design + * of this simple pseudo random number generator. + */ +#define DK_LCG_MUL UINT64CONST(6364136223846793005) +#define DK_LCG_INC UINT64CONST(1442695040888963407) + +/* do not use all small bits */ +#define LCG_SHIFT 13 + +/* + * PRP: parametric pseudo-random permutation + * + * Result in [0, size) is a permutation for inputs in the same set. + * + * Note that this function does not pass statistical tests: eg + * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable. + * Things worsen for large sizes as there are many more permutations + * (size!) than seeds to select them (2^64 < 21!). + * However it is inexpensive compared to an actual encryption function, + * and the quality is good enough to avoid trivial correlations on + * large sizes, which is the expected use case. + * + * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE. + * DO NOT USE FOR SUCH PURPOSE. + */ +static int64 +pseudorandom_perm(const int64 data, const int64 isize, const int64 seed) +{ + /* computations are performed on unsigned values */ + uint64 size = (uint64) isize; + uint64 v = (uint64) data % size; + uint64 key = (uint64) seed; + /* size-1 ensures 2 possibly overlapping halves */ + uint64 mask = compute_mask(size - 1) >> 1; + + /* nothing to permute */ + if (isize == 1) + return 0; + + Assert(isize >= 2); + + /*----- + * Apply 4 rounds of bijective transformations using key updated + * at each stage: + * + * (1) scramble: partial xors on overlapping power-of-2 subsets + * for instance with v in 0 .. 14 (i.e. with size == 15): + * if v is in 0 .. 7 do v = (v ^ k) % 8 + * if v is in 7 .. 14 do v = 14 - ((14-v) ^ k) % 8 + * note that because of the overlap (here 7), v may be changed twice. + * this transformation if bijective because the condition to apply it + * is still true after applying it, and xor itself is bijective on a + * power-of-2 size. + * + * (2) scatter: linear modulo + * v = (v * p + k) % size + * this transformation is bijective is p & size are prime, which is + * ensured in the code by the while loop which discards primes when + * size is a multiple of it. + * + */ + for (unsigned int i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES) + { + uint64 t; + + /* first "half" whitening, for v in 0 .. mask */ + key = key * DK_LCG_MUL + DK_LCG_INC; + if (v <= mask) + v ^= (key >> LCG_SHIFT) & mask; + + /* second (possibly overlapping) "half" whitening */ + key = key * DK_LCG_MUL + DK_LCG_INC; + t = size - 1 - v; + if (t <= mask) + { + t ^= (key >> LCG_SHIFT) & mask; + v = size - 1 - t; + } + + /* at most 2 primes are skipped for a given size */ + while (unlikely(size % primes[p] == 0)) + p = (p + 1) % PRP_PRIMES; + + /* scatter values with a prime multiplication */ + key = key * DK_LCG_MUL + DK_LCG_INC; + + /* Performance shortcut for 24 bit primes, ok for size up to ~10E12 */ + if ((v & UINT64CONST(0xffffffffff)) == v) + v = (primes[p] * v + (key >> LCG_SHIFT)) % size; + else + /*----- + * Note: the add cannot overflow as size is under 63 bits: + * mmv = mm(prime, v, size) < size <= 0x7fffffffffffffff = (1<<63)-1 + * ks = key >> LCG_SHIFTS <= 2^51 + * => mmv + ks < (1<<64) - 1 + */ + v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size; + } + + /* back to signed */ + return (int64) v; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -2515,6 +2811,27 @@ evalStandardFunc(TState *thread, CState *st, return true; } + case PGBENCH_PRPERM: + { + int64 val, size, seed; + + Assert(nargs == 3); + + if (!coerceToInt(&vargs[0], &val) || + !coerceToInt(&vargs[1], &size) || + !coerceToInt(&vargs[2], &seed)) + return false; + + if (size < 1) + { + fprintf(stderr, "pr_perm size parameter must be >= 1\n"); + return false; + } + + setIntValue(retval, pseudorandom_perm(val, size, seed)); + return true; + } + default: /* cannot get here */ Assert(0); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index dc557d416c..f9f6b38662 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -99,7 +99,8 @@ typedef enum PgBenchFunction PGBENCH_IS, PGBENCH_CASE, PGBENCH_HASH_FNV1A, - PGBENCH_HASH_MURMUR2 + PGBENCH_HASH_MURMUR2, + PGBENCH_PRPERM } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index ad15ba66ea..0ecbf07c6d 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -321,6 +321,17 @@ pgbench( qr{command=98.: int 5432\b}, # :random_seed qr{command=99.: int -9223372036854775808\b}, # min int qr{command=100.: int 9223372036854775807\b}, # max int + qr{command=101.: boolean true\b}, + qr{command=102.: boolean true\b}, + qr{command=103.: boolean true\b}, + qr{command=104.: boolean true\b}, + qr{command=105.: boolean true\b}, + qr{command=109.: boolean true\b}, + qr{command=110.: boolean true\b}, + qr{command=111.: boolean true\b}, + qr{command=112.: boolean true\b}, + qr{command=113.: int 9223372036854775797\b}, + qr{command=114.: boolean true\b}, ], 'pgbench expressions', { @@ -448,6 +459,37 @@ SELECT :v0, :v1, :v2, :v3; -- minint constant parsing \set min debug(-9223372036854775808) \set max debug(-(:min + 1)) +-- pseudo-random permutation +\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1) +\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3) +\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6) +\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10) +\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \ + pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \ + pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \ + pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120) +-- random sanity check +\set size random(2, 1000) +\set v random(0, :size - 1) +\set p pr_perm(:v, :size) +\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size)) +-- actual values +\set t debug(pr_perm(:v, 1) = 0) +\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \ + pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0) +-- ~50 bits test to exercise full modular multiplication +\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \ + pr_perm(1999999999999999, 1000000000000000, 54321) = 54570773902028 and \ + pr_perm(2500000000000000, 1000000000000000, 54321) = 771082311307468) +-- 63 bits tests +\set size debug(:max - 10) +\set ok debug(pr_perm(:size-1, :size, 5432) = 7214172101785397543 and \ + pr_perm(:size-2, :size, 5432) = 4060085360303920649 and \ + pr_perm(:size-3, :size, 5432) = 919477102797071521 and \ + pr_perm(:size-4, :size, 5432) = 7588404289602340497 and \ + pr_perm(:size-5, :size, 5432) = 5568354808723584469 and \ + pr_perm(:size-6, :size, 5432) = 2410458883211907565 and \ + pr_perm(:size-7, :size, 5432) = 1738667467693569814) } }); @@ -779,6 +821,8 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i); [ 'misc empty script', 1, [qr{empty command list for script}], q{} ], [ 'bad boolean', 2, [qr{malformed variable.*trueXXX}], q{\set b :badtrue or true} ], + [ 'invalid pr_perm size', 2, + [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)}], # GSET & CSET [ 'gset no row', 2, diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index 69a6d03bb3..0acbb7fbbb 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -306,6 +306,16 @@ my @script_tests = ( 'double overflow 3', [qr{double constant overflow}], { 'overflow-3.sql' => "\\set d .1E310\n" } + ], + [ + 'not enough arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" } + ], + [ + 'too many arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" } ],); for my $t (@script_tests)