Hello Hironobu-san,
I reviewed pgbench-prp-func-6.patch.
Thanks.
(1) modular_multiply()
In modular_multiply(), the numbers of digits of inputs are checked in order
to determine using the interleaved modular multiplication algorithm or not.
However, the check condition absolutely depends on the implementation of
pseudorandom_perm() even though modular_multiply() is a general function.
I think it is better that pseudorandom_perm() directly checks the condition
because it can be worked efficiently and modular_multiply() can be used in
general purpose.
You moved the shortcut to the caller. Why not, it removes one modulo
operation and avoids the call altogether.
(2) pseudorandom_perm() and 001_pgbench_with_server.pl
Reading the discussion of __builtin_xxx functions, I remembered another
overflow issue.
pseudorandom_perm() uses the Donald Knuth's linear congruential generator
method to obtain pseudo-random numbers. This method, not only this but also
all linear congruential generator methods, always overflows.
If we do not need to guarantee the portability of this code,
ISTM that we do not need such changes: the code is already portable
because standard C unsigned operations overflow consistently and this is
what it really expected for the linear congruential generator.
If an alternate implementation should be provided, given the heavy cost of
the modular multiplication function, it would be only for those
architectures which fails, detected at configure time. I would not go into
this without an actual failing architecture & C compiler.
Also, the alternate implementation should not change the result, so
something looks amiss in your version. Moreover, I'm unclear how to
implement an overflow multiply with the safe no overflow version.
Here is a v8 which:
- moves the shortcut to the caller
- changes the r formula as you did
- does nothing about Knuth's formula, as nothing should be needed
- fixes tests after Peter's exit status changes
--
Fabien.
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index af2dea1c2a..5b09f73d26 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -327,6 +327,26 @@ fi])# PGAC_C_BUILTIN_BSWAP64
+# PGAC_C_BUILTIN_CLZLL
+# -------------------------
+# Check if the C compiler understands __builtin_clzll(),
+# and define HAVE__BUILTIN_CLZLL if so.
+# Both GCC & CLANG seem to have one.
+AC_DEFUN([PGAC_C_BUILTIN_CLZLL],
+[AC_CACHE_CHECK(for __builtin_clzll, pgac_cv__builtin_clzll,
+[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+[static unsigned long int x = __builtin_clzll(0xaabbccddeeff0011);]
+)],
+[pgac_cv__builtin_clzll=yes],
+[pgac_cv__builtin_clzll=no])])
+if test x"$pgac_cv__builtin_clzll" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_CLZLL, 1,
+ [Define to 1 if your compiler understands __builtin_clzll.])
+fi])# PGAC_C_BUILTIN_CLZLL
+
+
+
+
# PGAC_C_BUILTIN_CONSTANT_P
# -------------------------
# Check if the C compiler understands __builtin_constant_p(),
diff --git a/configure b/configure
index b7250d7f5b..a6dc740fee 100755
--- a/configure
+++ b/configure
@@ -13950,6 +13950,30 @@ if test x"$pgac_cv__builtin_bswap64" = xyes ; then
$as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzll" >&5
+$as_echo_n "checking for __builtin_clzll... " >&6; }
+if ${pgac_cv__builtin_clzll+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+static unsigned long int x = __builtin_clzll(0xaabbccddeeff0011);
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pgac_cv__builtin_clzll=yes
+else
+ pgac_cv__builtin_clzll=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzll" >&5
+$as_echo "$pgac_cv__builtin_clzll" >&6; }
+if test x"$pgac_cv__builtin_clzll" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_CLZLL 1" >>confdefs.h
+
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5
$as_echo_n "checking for __builtin_constant_p... " >&6; }
diff --git a/configure.in b/configure.in
index de5f777333..e872875293 100644
--- a/configure.in
+++ b/configure.in
@@ -1485,6 +1485,7 @@ PGAC_C_TYPES_COMPATIBLE
PGAC_C_BUILTIN_BSWAP16
PGAC_C_BUILTIN_BSWAP32
PGAC_C_BUILTIN_BSWAP64
+PGAC_C_BUILTIN_CLZLL
PGAC_C_BUILTIN_CONSTANT_P
PGAC_C_BUILTIN_UNREACHABLE
PGAC_C_COMPUTED_GOTO
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index b5e3a62a33..77cbfcd097 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -929,7 +929,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
<row>
<entry> <literal>default_seed</literal> </entry>
- <entry>seed used in hash functions by default</entry>
+ <entry>seed used in hash and pseudo-random permutation functions by default</entry>
</row>
<row>
@@ -1389,6 +1389,13 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
<entry><literal>pow(2.0, 10)</literal>, <literal>power(2.0, 10)</literal></entry>
<entry><literal>1024.0</literal></entry>
</row>
+ <row>
+ <entry><literal><function>pr_perm(<replaceable>i</replaceable>, <replaceable>size</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
+ <entry>integer</entry>
+ <entry>pseudo-random permutation in [0,size)</entry>
+ <entry><literal>pr_perm(0, 4)</literal></entry>
+ <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal> or <literal>3</literal></entry>
+ </row>
<row>
<entry><literal><function>random(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>)</function></literal></entry>
<entry>integer</entry>
@@ -1550,6 +1557,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
</programlisting>
</para>
+ <para>
+ Function <literal>pr_perm</literal> implements a pseudo-random permutation.
+ It allows to mix the output of non uniform random functions so that
+ values drawn more often are not trivially correlated.
+ It permutes integers in [0, size) using a seed by applying rounds of
+ simple invertible functions, similarly to an encryption function,
+ although beware that it is not at all cryptographically secure.
+ Compared to <literal>hash</literal> functions discussed above, the function
+ ensures that a perfect permutation is applied: there are no collisions
+ nor holes in the output values.
+ Values outside the interval are interpreted modulo the size.
+ The function errors if size is not positive.
+ If no seed is provided, <literal>:default_seed</literal> is used.
+ For a given size and seed, the function is fully deterministic: if two
+ permutations on the same size must not be correlated, use distinct seeds
+ as outlined in the previous example about hash functions.
+ </para>
+
<para>
As an example, the full definition of the built-in TPC-B-like
transaction is:
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index bab6f8a95c..94fcebb77a 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
#define PGBENCH_NARGS_VARIABLE (-1)
#define PGBENCH_NARGS_CASE (-2)
#define PGBENCH_NARGS_HASH (-3)
+#define PGBENCH_NARGS_PRPERM (-4)
PgBenchExpr *expr_parse_result;
@@ -370,6 +371,9 @@ static const struct
{
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
+ {
+ "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM
+ },
/* keep as last array element */
{
NULL, 0, 0
@@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
}
break;
+ /* pseudo-random permutation function with optional seed argument */
+ case PGBENCH_NARGS_PRPERM:
+ if (len < 2 || len > 3)
+ expr_yyerror_more(yyscanner, "unexpected number of arguments",
+ PGBENCH_FUNCTIONS[fnumber].fname);
+
+ if (len == 2)
+ {
+ PgBenchExpr *var = make_variable("default_seed");
+ args = make_elist(var, args);
+ }
+ break;
+
/* common case: positive arguments number */
default:
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 81bc6d8a6e..e216e041f2 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -1066,6 +1066,243 @@ getHashMurmur2(int64 val, uint64 seed)
return (int64) result;
}
+/* pseudo-random permutation */
+
+/* 16 so that % 16 can be optimized to & 0x0f */
+#define PRP_PRIMES 16
+/*
+ * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/
+ * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19
+ */
+static uint64 primes[PRP_PRIMES] = {
+ UINT64CONST(8388617),
+ UINT64CONST(8912921),
+ UINT64CONST(9437189),
+ UINT64CONST(9961487),
+ UINT64CONST(10485767),
+ UINT64CONST(11010059),
+ UINT64CONST(11534351),
+ UINT64CONST(12058679),
+ UINT64CONST(12582917),
+ UINT64CONST(13107229),
+ UINT64CONST(13631489),
+ UINT64CONST(14155777),
+ UINT64CONST(14680067),
+ UINT64CONST(15204391),
+ UINT64CONST(15728681),
+ UINT64CONST(16252967)
+};
+
+/* how many "encryption" rounds to apply */
+#define PRP_ROUNDS 4
+
+/* return smallest mask holding n */
+static uint64 compute_mask(uint64 n)
+{
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ n |= n >> 32;
+ return n;
+}
+
+/* length of n binary representation */
+static int nbits(uint64 n)
+{
+#ifdef HAVE__BUILTIN_CLZLL
+ return 64 - (n != 0 ? __builtin_clzll(n) : 64);
+#else /* use clever no branching bitwise operator version */
+ /* set all lower bits to 1 */
+ n = compute_mask(n);
+ /* then count them */
+ n -= (n >> 1) & UINT64CONST(0x5555555555555555);
+ n = (n & UINT64CONST(0x3333333333333333)) + ((n >> 2) & UINT64CONST(0x3333333333333333));
+ n = (n + (n >> 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+ return (n * UINT64CONST(0x0101010101010101)) >> 56;
+#endif /* HAVE__BUILTIN_CLZLL */
+}
+
+/*
+ * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64).
+ *
+ * Use improved interleaved modular multiplication algorithm to avoid
+ * overflows when necessary.
+ */
+static uint64 modular_multiply(uint64 x, uint64 y, const uint64 m)
+{
+ int i, bits;
+ uint64 r;
+
+ Assert(m >= 1);
+
+ /* Because of (x * y) % m = (x % m * y % m) % m */
+ if (x >= m)
+ x %= m;
+ if (y >= m)
+ y %= m;
+
+ /* Return the trivial result. */
+ if (x == 0 || y == 0 || m == 1)
+ return 0;
+
+ /* Return the result if (x * y) can be multiplied without overflow. */
+ if (nbits(x) + nbits(y) <= 64)
+ return (x * y) % m;
+
+ /* To reduce the for loop in the algorithm below, ensure y <= x. */
+ if (x < y)
+ {
+ uint64 tmp = x;
+ x = y;
+ y = tmp;
+ }
+
+ /* Interleaved modular multiplication algorithm from:
+ *
+ * D.N. Amanor et al., "Efficient hardware architecture for
+ * modular multiplication on FPGAs", in International Conference on
+ * Field Programmable Logic and Applications, Aug 2005, pp. 539-542.
+ *
+ * This algorithm is usually used in the field of digital circuit
+ * design.
+ *
+ * Input: X, Y, M; 0 <= X, Y <= M;
+ * Output: R = X * Y mod M;
+ * bits: number of bits of Y
+ * Y[i]: i th bit of Y
+ *
+ * 1. R = 0;
+ * 2. for (i = bits - 1; i >= 0; i--) {
+ * 3. R = 2 * R;
+ * 4. if (Y[i] == 0x1)
+ * 5. R += X;
+ * 6. if (R >= M) R -= M;
+ * 7. if (R >= M) R -= M;
+ * }
+ *
+ * In Steps 3 and 5, overflow should be avoided.
+ * Steps 6 and 7 can be instead a modular operation (R %= M).
+ */
+
+ bits = nbits(y);
+ r = 0;
+
+ for (i = bits - 1; i >= 0; i--)
+ {
+ if (r > UINT64CONST(0x7fffffffffffffff))
+ /* To avoid overflow, transform from (2 * r) to
+ * (2 * r) % m, and further transform to
+ * mathematically equivalent form shown below:
+ */
+ r = m - ((m - r) << 1);
+ else
+ r <<= 1;
+
+ if ((y >> i) & 0x1)
+ {
+ /* Compute (r + x) without overflow using same
+ * transformations described in the above comment.
+ */
+ if (m > UINT64CONST(0x7fffffffffffffff))
+ r = ((m - r) > x) ? r + x : r + x - m;
+ else
+ r = (r > m) ? r + x - m : r + x;
+ }
+
+ r %= m;
+ }
+
+ return r;
+}
+
+/*
+ * Donald Knuth's linear congruential generator
+ *
+ * Relying on multiplication overflows is part of the design
+ * of this simple pseudo random number generator.
+ */
+#define DK_LCG_MUL UINT64CONST(6364136223846793005)
+#define DK_LCG_INC UINT64CONST(1442695040888963407)
+
+/* do not use all small bits */
+#define LCG_SHIFT 13
+
+/*
+ * PRP: parametric pseudo-random permutation
+ *
+ * Result in [0, size) is a permutation for inputs in the same set.
+ *
+ * Note that this function does not pass statistical tests: eg
+ * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable.
+ * Things worsen for large sizes as there are many more permutations
+ * (size!) than seeds to select them (2^64 < 21!).
+ * However it is inexpensive compared to an actual encryption function,
+ * and the quality is good enough to avoid trivial correlations on
+ * large sizes, which is the expected use case.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+pseudorandom_perm(const int64 data, const int64 isize, const int64 seed)
+{
+ /* computations are performed on unsigned values */
+ uint64 size = (uint64) isize;
+ uint64 v = (uint64) data % size;
+ uint64 key = (uint64) seed;
+ /* size-1: ensures 2 possibly overlapping halves */
+ uint64 mask = compute_mask(size-1) >> 1;
+
+ unsigned int i, p;
+
+ /* nothing to permute */
+ if (isize == 1)
+ return 0;
+
+ Assert(isize >= 2);
+
+ /* apply 4 rounds of bijective transformations:
+ * (1) scramble: partial xors on overlapping power-or-2 subsets
+ * (2) scatter: linear modulo
+ */
+ for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES)
+ {
+ uint64 t;
+
+ /* first "half" whitening, for v in 0 .. mask */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ if (v <= mask)
+ v ^= (key >> LCG_SHIFT) & mask;
+
+ /* second (possibly overlapping) "half" whitening */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ t = size - 1 - v;
+ if (t <= mask)
+ {
+ t ^= (key >> LCG_SHIFT) & mask;
+ v = size - 1 - t;
+ }
+
+ /* at most 2 primes are skipped for a given size */
+ while (unlikely(size % primes[p] == 0))
+ p = (p + 1) % PRP_PRIMES;
+
+ /* scatter values with a prime multiplication */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+
+ /* Performance shortcut for our 24 bit primes, ok for size up to ~10E12 */
+ if ((v & UINT64CONST(0xffffffffff)) == v)
+ v = (primes[p] * v + (key >> LCG_SHIFT)) % size;
+ else
+ v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size;
+ }
+
+ /* back to signed */
+ return (int64) v;
+}
+
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -2420,6 +2657,27 @@ evalStandardFunc(TState *thread, CState *st,
return true;
}
+ case PGBENCH_PRPERM:
+ {
+ int64 val, size, seed;
+
+ Assert(nargs == 3);
+
+ if (!coerceToInt(&vargs[0], &val) ||
+ !coerceToInt(&vargs[1], &size) ||
+ !coerceToInt(&vargs[2], &seed))
+ return false;
+
+ if (size < 1)
+ {
+ fprintf(stderr, "pr_perm size parameter must be >= 1\n");
+ return false;
+ }
+
+ setIntValue(retval, pseudorandom_perm(val, size, seed));
+ return true;
+ }
+
default:
/* cannot get here */
Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index de50340434..12d5c2946b 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
PGBENCH_IS,
PGBENCH_CASE,
PGBENCH_HASH_FNV1A,
- PGBENCH_HASH_MURMUR2
+ PGBENCH_HASH_MURMUR2,
+ PGBENCH_PRPERM
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 0081989026..6ebbcb73cf 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -323,6 +323,13 @@ pgbench(
qr{command=98.: int 5432\b}, # :random_seed
qr{command=99.: int -9223372036854775808\b}, # min int
qr{command=100.: int 9223372036854775807\b}, # max int
+ qr{command=101.: boolean true\b},
+ qr{command=102.: boolean true\b},
+ qr{command=103.: boolean true\b},
+ qr{command=104.: boolean true\b},
+ qr{command=105.: boolean true\b},
+ qr{command=109.: boolean true\b},
+ qr{command=110.: boolean true\b},
],
'pgbench expressions',
{
@@ -450,6 +457,28 @@ SELECT :v0, :v1, :v2, :v3;
-- minint constant parsing
\set min debug(-9223372036854775808)
\set max debug(-(:min + 1))
+-- pseudo-random permutation
+\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1)
+\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3)
+\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6)
+\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10)
+\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \
+ pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \
+ pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \
+ pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120)
+-- random sanity check
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p pr_perm(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size))
+-- actual values
+\set t debug(pr_perm(:v, 1) = 0)
+\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \
+ pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0)
+-- ~50 bits test to exercise full modular multiplication
+\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \
+ pr_perm(1999999999999999, 1000000000000000, 54321) = 54570773902028 and \
+ pr_perm(2500000000000000, 1000000000000000, 54321) = 771082311307468)
}
});
@@ -740,6 +769,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
[
'bad boolean', 2,
[qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
+ ],
+ [
+ 'invalid pr_perm size', 2,
+ [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)}
],);
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 696c378edc..7c03ef2bbc 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -306,6 +306,16 @@ my @script_tests = (
'double overflow 3',
[qr{double constant overflow}],
{ 'overflow-3.sql' => "\\set d .1E310\n" }
+ ],
+ [
+ 'not enough arguments for pr_perm',
+ [qr{unexpected number of arguments \(pr_perm\)}],
+ { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" }
+ ],
+ [
+ 'too many arguments for pr_perm',
+ [qr{unexpected number of arguments \(pr_perm\)}],
+ { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" }
],);
for my $t (@script_tests)
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 9798bd24b4..4cf375085d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -734,6 +734,9 @@
/* Define to 1 if your compiler understands __builtin_bswap64. */
#undef HAVE__BUILTIN_BSWAP64
+/* Define to 1 if your compiler understands __builtin_clzll. */
+#undef HAVE__BUILTIN_CLZLL
+
/* Define to 1 if your compiler understands __builtin_constant_p. */
#undef HAVE__BUILTIN_CONSTANT_P