Attached v19 is a rebase, per cfbot.
Attached v20 fixes a doc xml typo, per cfbot again.
--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 9f3bb5fce6..d4a604c6fa 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1033,7 +1033,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
<row>
<entry> <literal>default_seed</literal> </entry>
- <entry>seed used in hash functions by default</entry>
+ <entry>seed used in hash and pseudo-random permutation functions by default</entry>
</row>
<row>
@@ -1850,6 +1850,20 @@ SELECT 4 AS four \; SELECT 5 AS five \aset
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>pr_perm</function> ( <parameter>i</parameter>, <parameter>size</parameter> [, <parameter>seed</parameter> ] )
+ <returnvalue>integer</returnvalue>
+ </para>
+ <para>
+ pseudo-random permutation in <literal>[0,size)</literal>
+ </para>
+ <para>
+ <literal>pr_perm(0, 4)</literal>
+ <returnvalue>an integer between 0 and 3</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>random</function> ( <parameter>lb</parameter>, <parameter>ub</parameter> )
@@ -1936,6 +1950,20 @@ SELECT 4 AS four \; SELECT 5 AS five \aset
shape of the distribution.
</para>
+ <note>
+ <para>
+ When designing a benchmark which selects rows non-uniformly, be aware
+ that using <application>pgbench</application> non-uniform random functions
+ directly leads to unduly correlations: rows with close ids come out with
+ similar probability, skewing performance measures because they also reside
+ in close pages.
+ </para>
+ <para>
+ You must use <function>pr_perm</function> to shuffle the selected ids, or
+ some other additional step with similar effect, to avoid these skewing impacts.
+ </para>
+ </note>
+
<itemizedlist>
<listitem>
<para>
@@ -2030,24 +2058,54 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
<literal>hash_fnv1a</literal> accept an input value and an optional seed parameter.
In case the seed isn't provided the value of <literal>:default_seed</literal>
is used, which is initialized randomly unless set by the command-line
- <literal>-D</literal> option. Hash functions can be used to scatter the
- distribution of random functions such as <literal>random_zipfian</literal> or
- <literal>random_exponential</literal>. For instance, the following pgbench
- script simulates possible real world workload typical for social media and
+ <literal>-D</literal> option.
+ </para>
+
+ <para>
+ Function <literal>pr_perm</literal> implements a pseudo-random permutation
+ on an arbitrary size.
+ It allows to shuffle output of non uniform random functions so that
+ values drawn more often are not trivially correlated.
+ It permutes integers in [0, size) using a seed by applying rounds of
+ simple invertible functions, similarly to an encryption function,
+ although beware that it is not at all cryptographically secure.
+ Compared to <literal>hash</literal> functions discussed above, the function
+ ensures that a perfect permutation is applied: there are neither collisions
+ nor holes in the output values.
+ Values outside the interval are interpreted modulo the size.
+ The function raises an error if size is not positive.
+ If no seed is provided, <literal>:default_seed</literal> is used.
+
+ For instance, the following <application>pgbench</application> script
+ simulates possible real world workload typical for social media and
blogging platforms where few accounts generate excessive load:
<programlisting>
-\set r random_zipfian(0, 100000000, 1.07)
-\set k abs(hash(:r)) % 1000000
+\set size 1000000
+\set r random_zipfian(1, :size, 1.07)
+\set k 1 + pr_perm(:r, :size)
</programlisting>
In some cases several distinct distributions are needed which don't correlate
with each other and this is when implicit seed parameter comes in handy:
<programlisting>
-\set k1 abs(hash(:r, :default_seed + 123)) % 1000000
-\set k2 abs(hash(:r, :default_seed + 321)) % 1000000
+\set k1 1 + pr_perm(:r, :size, :default_seed + 123)
+\set k2 1 + pr_perm(:r, :size, :default_seed + 321)
</programlisting>
+
+ An similar behavior can also be approximated with
+ <function>hash</function>:
+
+<programlisting>
+\set size 1000000
+\set r random_zipfian(1, 100 * :size, 1.07)
+\set k 1 + abs(hash(:r)) % :size
+</programlisting>
+
+ As this hash-modulo version generates collisions, some
+ <literal>id</literal> would not be reachable and others would come more often
+ than the target distribution.
</para>
<para>
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index 85d61caa9f..0ea968c94b 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
#define PGBENCH_NARGS_VARIABLE (-1)
#define PGBENCH_NARGS_CASE (-2)
#define PGBENCH_NARGS_HASH (-3)
+#define PGBENCH_NARGS_PRPERM (-4)
PgBenchExpr *expr_parse_result;
@@ -370,6 +371,9 @@ static const struct
{
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
+ {
+ "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM
+ },
/* keep as last array element */
{
NULL, 0, 0
@@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
}
break;
+ /* pseudo-random permutation function with optional seed argument */
+ case PGBENCH_NARGS_PRPERM:
+ if (len < 2 || len > 3)
+ expr_yyerror_more(yyscanner, "unexpected number of arguments",
+ PGBENCH_FUNCTIONS[fnumber].fname);
+
+ if (len == 2)
+ {
+ PgBenchExpr *var = make_variable("default_seed");
+ args = make_elist(var, args);
+ }
+ break;
+
/* common case: positive arguments number */
default:
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 08a5947a9e..287e95078f 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -32,6 +32,13 @@
#endif
#include "postgres_fe.h"
+#include "common/int.h"
+#include "common/logging.h"
+#include "fe_utils/conditional.h"
+#include "getopt_long.h"
+#include "libpq-fe.h"
+#include "portability/instr_time.h"
+#include "port/pg_bitutils.h"
#include <ctype.h>
#include <float.h>
@@ -1061,6 +1068,307 @@ getHashMurmur2(int64 val, uint64 seed)
return (int64) result;
}
+/* pseudo-random permutation */
+
+/* 16 so that % 16 can be optimized to & 0x0f */
+#define PRP_PRIMES 16
+/*
+ * 24 bit mega primes from https://primes.utm.edu/lists/small/millions/
+ * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19
+ */
+static uint64 primes[PRP_PRIMES] = {
+ UINT64CONST(8388617),
+ UINT64CONST(8912921),
+ UINT64CONST(9437189),
+ UINT64CONST(9961487),
+ UINT64CONST(10485767),
+ UINT64CONST(11010059),
+ UINT64CONST(11534351),
+ UINT64CONST(12058679),
+ UINT64CONST(12582917),
+ UINT64CONST(13107229),
+ UINT64CONST(13631489),
+ UINT64CONST(14155777),
+ UINT64CONST(14680067),
+ UINT64CONST(15204391),
+ UINT64CONST(15728681),
+ UINT64CONST(16252967)
+};
+
+/* how many "encryption" rounds to apply */
+#define PRP_ROUNDS 4
+
+/* return smallest mask holding n */
+static uint64
+compute_mask(uint64 n)
+{
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ n |= n >> 32;
+ return n;
+}
+
+#if !defined(PG_INT128_TYPE)
+/* length of n binary representation */
+static int
+nbits(uint64 n)
+{
+ /* set lower bits to 1 and count them */
+ return pg_popcount64(compute_mask(n));
+}
+#endif
+
+/*
+ * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64).
+ *
+ * Use improved interleaved modular multiplication algorithm to avoid
+ * overflows when necessary.
+ */
+static uint64
+modular_multiply(uint64 x, uint64 y, const uint64 m)
+{
+#if defined(PG_INT128_TYPE)
+ return (PG_INT128_TYPE) x * (PG_INT128_TYPE) y % (PG_INT128_TYPE) m;
+#else
+ int i,
+ bits;
+ uint64 r;
+
+ Assert(m >= 1);
+
+ /* Because of (x * y) % m = (x % m * y % m) % m */
+ if (x >= m)
+ x %= m;
+ if (y >= m)
+ y %= m;
+
+ /* Return the trivial result. */
+ if (x == 0 || y == 0 || m == 1)
+ return 0;
+
+ /* Return the result if (x * y) can be multiplied without overflow. */
+ if (nbits(x) + nbits(y) <= 64)
+ return (x * y) % m;
+
+ /* To reduce the for loop in the algorithm below, ensure y <= x. */
+ if (x < y)
+ {
+ uint64 tmp = x;
+
+ x = y;
+ y = tmp;
+ }
+
+ /*-----
+ * Interleaved modular multiplication algorithm from:
+ *
+ * D.N. Amanor et al., "Efficient hardware architecture for modular
+ * multiplication on FPGAs", in International Conference on Field
+ * Programmable Logic and Applications, Aug 2005, pp. 539-542.
+ *
+ * This algorithm is usually used in the field of digital circuit design.
+ *
+ * Input: X, Y, M; 0 <= X, Y <= M;
+ * Output: R = X * Y mod M;
+ * bits: number of bits of Y
+ * Y[i]: i th bit of Y
+ *
+ * 1. R = 0;
+ * 2. for (i = bits - 1; i >= 0; i--) {
+ * 3. R = 2 * R;
+ * 4. if (Y[i] == 0x1)
+ * 5. R += X;
+ * 6. if (R >= M) R -= M;
+ * 7. if (R >= M) R -= M;
+ * }
+ *
+ * In Steps 3 and 5, overflow should be avoided. The details are explained
+ * in each step.
+ * Steps 6 and 7 can be instead a modular operation (R %= M).
+ *
+ * Note that, in this implementation, the distributive property of modular
+ * operation shown in below is used whenever it can be applied.
+ * (X * Y) % M = X % M * Y % M, (X + Y) % M = X % M + Y % M
+ *
+ * For ease of understanding, an example is shown.
+ * Example: (11 * 5) % 7
+ *
+ * [Notation] ":=" means substitution.
+ *
+ * [initial value]
+ * R := 0x0000
+ *
+ * [i=2]
+ * R := (0x0000 * 2) % 0x0111 = 0x0000
+ * R := (R + 0x1011) % 0x0111 = 0x0100
+ *
+ * [i=1]
+ * R := (0x0100 * 2) % 0x0111 = 0x1000 % 0x0111 = 0x0001
+ *
+ * [i=0]
+ * R := (0x0001 * 2) % 0x0111 = 0x0010
+ * R := (R + 0x1011) % 0x0111 = 0x1101 % 0x0111 = 0x0110
+ *
+ * [result]
+ * R = 6
+ */
+
+ bits = nbits(y);
+ r = 0;
+
+ for (i = bits - 1; i >= 0; i--)
+ {
+ if (r > UINT64CONST(0x7fffffffffffffff))
+ /*-----
+ * To avoid overflow, transform from (2 * r) to (2 * r) % m, and
+ * further transform to mathematically equivalent form below:
+ *
+ * For ease of understanding, an example using one digit decimal number,
+ * where r = 6 and m = 7, is shown.
+ * (6 * 2) % 7 = (7 - 1) * 2 % 7 = (7 % 7 - 1 % 7) * 2 % 7
+ * = (0 - 1) * 2 % 7 = -2 % 7 = 7 - 2 = 5.
+ * Generally, if (r * 2) overflows,
+ * (r * 2) % m = m - (m - r) * 2.
+ */
+ r = m - ((m - r) << 1);
+ else
+ r <<= 1;
+
+ if ((y >> i) & 0x1)
+ {
+ if (r > UINT64CONST(0xffffffffffffffff) - x)
+ /*-----
+ * To compute (r + x) without overflow: transform to
+ * (r + x) % m, and then to (r + x - m).
+ *
+ * An example using one digit decimal number, where r = 6, x = 5 and
+ * m = 7, is shown.
+ * (6 + 5) % 7 = (6 + (5 - 7 + 7)) % 7 = 6 % 7 + (5 - 7) % 7 + 7 % 7
+ * = 6 + (5 - 7) + 0 = 4.
+ * Generally, if (r + x) overflows,
+ * (r + x) % m = r + (x - m).
+ */
+ r += x - m;
+ else
+ r += x;
+ }
+
+ r %= m;
+ }
+
+ return r;
+#endif
+}
+
+/*
+ * Donald Knuth's linear congruential generator
+ *
+ * Relying on multiplication overflows is part of the design
+ * of this simple pseudo random number generator.
+ */
+#define DK_LCG_MUL UINT64CONST(6364136223846793005)
+#define DK_LCG_INC UINT64CONST(1442695040888963407)
+
+/* do not use all small bits */
+#define LCG_SHIFT 13
+
+/*
+ * PRP: parametric pseudo-random permutation
+ *
+ * Result in [0, size) is a permutation for inputs in the same set.
+ *
+ * Note that this function does not pass statistical tests: eg
+ * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable.
+ * Things worsen for large sizes as there are many more permutations
+ * (size!) than seeds to select them (2^64 < 21!).
+ * However it is inexpensive compared to an actual encryption function,
+ * and the quality is good enough to avoid trivial correlations on
+ * large sizes, which is the expected use case.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+pseudorandom_perm(const int64 data, const int64 isize, const int64 seed)
+{
+ /* computations are performed on unsigned values */
+ uint64 size = (uint64) isize;
+ uint64 v = (uint64) data % size;
+ uint64 key = (uint64) seed;
+ /* size-1 ensures 2 possibly overlapping halves */
+ uint64 mask = compute_mask(size - 1) >> 1;
+
+ /* nothing to permute */
+ if (isize == 1)
+ return 0;
+
+ Assert(isize >= 2);
+
+ /*-----
+ * Apply 4 rounds of bijective transformations using key updated
+ * at each stage:
+ *
+ * (1) scramble: partial xors on overlapping power-of-2 subsets
+ * for instance with v in 0 .. 14 (i.e. with size == 15):
+ * if v is in 0 .. 7 do v = (v ^ k) % 8
+ * if v is in 7 .. 14 do v = 14 - ((14-v) ^ k) % 8
+ * note that because of the overlap (here 7), v may be changed twice.
+ * this transformation if bijective because the condition to apply it
+ * is still true after applying it, and xor itself is bijective on a
+ * power-of-2 size.
+ *
+ * (2) scatter: linear modulo
+ * v = (v * p + k) % size
+ * this transformation is bijective is p & size are prime, which is
+ * ensured in the code by the while loop which discards primes when
+ * size is a multiple of it.
+ *
+ */
+ for (unsigned int i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES)
+ {
+ uint64 t;
+
+ /* first "half" whitening, for v in 0 .. mask */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ if (v <= mask)
+ v ^= (key >> LCG_SHIFT) & mask;
+
+ /* second (possibly overlapping) "half" whitening */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ t = size - 1 - v;
+ if (t <= mask)
+ {
+ t ^= (key >> LCG_SHIFT) & mask;
+ v = size - 1 - t;
+ }
+
+ /* at most 2 primes are skipped for a given size */
+ while (unlikely(size % primes[p] == 0))
+ p = (p + 1) % PRP_PRIMES;
+
+ /* scatter values with a prime multiplication */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+
+ /* Performance shortcut for 24 bit primes, ok for size up to ~10E12 */
+ if ((v & UINT64CONST(0xffffffffff)) == v)
+ v = (primes[p] * v + (key >> LCG_SHIFT)) % size;
+ else
+ /*-----
+ * Note: the add cannot overflow as size is under 63 bits:
+ * mmv = mm(prime, v, size) < size <= 0x7fffffffffffffff = (1<<63)-1
+ * ks = key >> LCG_SHIFTS <= 2^51
+ * => mmv + ks < (1<<64) - 1
+ */
+ v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size;
+ }
+
+ /* back to signed */
+ return (int64) v;
+}
+
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -2398,6 +2706,27 @@ evalStandardFunc(CState *st,
return true;
}
+ case PGBENCH_PRPERM:
+ {
+ int64 val, size, seed;
+
+ Assert(nargs == 3);
+
+ if (!coerceToInt(&vargs[0], &val) ||
+ !coerceToInt(&vargs[1], &size) ||
+ !coerceToInt(&vargs[2], &seed))
+ return false;
+
+ if (size < 1)
+ {
+ fprintf(stderr, "pr_perm size parameter must be >= 1\n");
+ return false;
+ }
+
+ setIntValue(retval, pseudorandom_perm(val, size, seed));
+ return true;
+ }
+
default:
/* cannot get here */
Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index fb2c34f512..a95095a27d 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
PGBENCH_IS,
PGBENCH_CASE,
PGBENCH_HASH_FNV1A,
- PGBENCH_HASH_MURMUR2
+ PGBENCH_HASH_MURMUR2,
+ PGBENCH_PRPERM
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 52009c3524..7d4c51ab21 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -467,6 +467,17 @@ pgbench(
qr{command=98.: int 5432\b}, # :random_seed
qr{command=99.: int -9223372036854775808\b}, # min int
qr{command=100.: int 9223372036854775807\b}, # max int
+ qr{command=101.: boolean true\b},
+ qr{command=102.: boolean true\b},
+ qr{command=103.: boolean true\b},
+ qr{command=104.: boolean true\b},
+ qr{command=105.: boolean true\b},
+ qr{command=109.: boolean true\b},
+ qr{command=110.: boolean true\b},
+ qr{command=111.: boolean true\b},
+ qr{command=112.: boolean true\b},
+ qr{command=113.: int 9223372036854775797\b},
+ qr{command=114.: boolean true\b},
],
'pgbench expressions',
{
@@ -594,6 +605,37 @@ SELECT :v0, :v1, :v2, :v3;
-- minint constant parsing
\set min debug(-9223372036854775808)
\set max debug(-(:min + 1))
+-- pseudo-random permutation
+\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1)
+\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3)
+\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6)
+\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10)
+\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \
+ pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \
+ pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \
+ pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120)
+-- random sanity check
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p pr_perm(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size))
+-- actual values
+\set t debug(pr_perm(:v, 1) = 0)
+\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \
+ pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0)
+-- ~50 bits test to exercise full modular multiplication
+\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \
+ pr_perm(1999999999999999, 1000000000000000, 54321) = 54570773902028 and \
+ pr_perm(2500000000000000, 1000000000000000, 54321) = 771082311307468)
+-- 63 bits tests
+\set size debug(:max - 10)
+\set ok debug(pr_perm(:size-1, :size, 5432) = 7214172101785397543 and \
+ pr_perm(:size-2, :size, 5432) = 4060085360303920649 and \
+ pr_perm(:size-3, :size, 5432) = 919477102797071521 and \
+ pr_perm(:size-4, :size, 5432) = 7588404289602340497 and \
+ pr_perm(:size-5, :size, 5432) = 5568354808723584469 and \
+ pr_perm(:size-6, :size, 5432) = 2410458883211907565 and \
+ pr_perm(:size-7, :size, 5432) = 1738667467693569814)
}
});
@@ -955,6 +997,10 @@ SELECT LEAST(} . join(', ', (':i') x 256) . q{)}
'bad boolean', 2,
[qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
],
+ [
+ 'invalid pr_perm size', 2,
+ [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)}
+ ],
# GSET
[
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index e38c7d77d1..c1a9f7f403 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -341,6 +341,16 @@ my @script_tests = (
'set i',
[ qr{set i 1 }, qr{\^ error found here} ],
{ 'set_i_op' => "\\set i 1 +\n" }
+ ],
+ [
+ 'not enough arguments for pr_perm',
+ [qr{unexpected number of arguments \(pr_perm\)}],
+ { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" }
+ ],
+ [
+ 'too many arguments for pr_perm',
+ [qr{unexpected number of arguments \(pr_perm\)}],
+ { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" }
],);
for my $t (@script_tests)