Hi Fabien,

On 28.01.2018 11:10, Fabien COELHO wrote:

Hello Ildar,

I did everything you mention here and attached a new version on the
patch.

Patch applies, compiles, runs ok.

Alas, I still have a few more very minor comments about the doc, sorry
again:

No problem : )


  +       <entry> <literal>default_seed</literal> </entry>
  +       <entry>random seed used in hash functions by default</entry>

s/random //: the seed may or may not be random.

The "In some cases several distinct distributions..." paragraph is also
just one line in the xml source file. It should be justified at about 80
columns like others.


Fixed the doc, attached the patch. Thanks!

--
Ildar Musin
i.mu...@postgrespro.ru
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..b2a9c9d 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -874,13 +874,18 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
 
      <tbody>
       <row>
-       <entry> <literal>scale</literal> </entry>
-       <entry>current scale factor</entry>
+       <entry> <literal>client_id</literal> </entry>
+       <entry>unique number identifying the client session (starts from zero)</entry>
       </row>
 
       <row>
-       <entry> <literal>client_id</literal> </entry>
-       <entry>unique number identifying the client session (starts from zero)</entry>
+       <entry> <literal>default_seed</literal> </entry>
+       <entry>seed used in hash functions by default</entry>
+      </row>
+
+      <row>
+       <entry> <literal>scale</literal> </entry>
+       <entry>current scale factor</entry>
       </row>
      </tbody>
     </tgroup>
@@ -1246,6 +1251,27 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
        <entry><literal>5</literal></entry>
       </row>
       <row>
+       <entry><literal><function>hash(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry>alias for <literal>hash_murmur2()</literal></entry>
+       <entry><literal>hash(10, 5432)</literal></entry>
+       <entry><literal>-5817877081768721676</literal></entry>
+      </row>
+      <row>
+       <entry><literal><function>hash_fnv1a(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry><ulink url="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function";>FNV-1a hash</ulink></entry>
+       <entry><literal>hash_fnv1a(10, 5432)</literal></entry>
+       <entry><literal>-7793829335365542153</literal></entry>
+      </row>
+      <row>
+       <entry><literal><function>hash_murmur2(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry><ulink url="https://en.wikipedia.org/wiki/MurmurHash";>MurmurHash2 hash</ulink></entry>
+       <entry><literal>hash_murmur2(10, 5432)</literal></entry>
+       <entry><literal>-5817877081768721676</literal></entry>
+      </row>
+      <row>
        <entry><literal><function>int(<replaceable>x</replaceable>)</function></literal></entry>
        <entry>integer</entry>
        <entry>cast to int</entry>
@@ -1423,6 +1449,31 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
    </itemizedlist>
 
   <para>
+    Hash functions <literal>hash</literal>, <literal>hash_murmur2</literal> and
+    <literal>hash_fnv1a</literal> accept an input value and an optional seed parameter.
+    In case the seed isn't provided the value of <literal>:default_seed</literal>
+    is used, which is initialized randomly unless set by the command-line
+    <literal>-D</literal> option. Hash functions can be used to scatter the
+    distribution of random functions such as <literal>random_zipfian</literal> or
+    <literal>random_exponential</literal>. For instance, the following pgbench
+    script simulates possible real world workload typical for social media and
+    blogging platforms where few accounts generate excessive load:
+
+<programlisting>
+\set r random_zipfian(0, 100000000, 1.07)
+\set k abs(hash(:r)) % 1000000
+</programlisting>
+
+    In some cases several distinct distributions are needed which don't correlate
+    with each other and this is when implicit seed parameter comes in handy:
+
+<programlisting>
+\set k1 abs(hash(:r), :default_seed + 123) % 1000000
+\set k2 abs(hash(:r), :default_seed + 321) % 1000000
+</programlisting>
+  </para>
+
+  <para>
    As an example, the full definition of the built-in TPC-B-like
    transaction is:
 
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index e23ca51..fc42c47 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -16,6 +16,10 @@
 
 #include "pgbench.h"
 
+#define PGBENCH_NARGS_VARIABLE	(-1)
+#define PGBENCH_NARGS_CASE		(-2)
+#define PGBENCH_NARGS_HASH		(-3)
+
 PgBenchExpr *expr_parse_result;
 
 static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
@@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr)
 /*
  * List of available functions:
  * - fname: function name, "!..." for special internal functions
- * - nargs: number of arguments
- *			-1 is a special value for least & greatest meaning #args >= 1
- *			-2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd
+ * - nargs: number of arguments. Special cases:
+ *			- PGBENCH_NARGS_VARIABLE is a special value for least & greatest
+ *			  meaning #args >= 1;
+ *			- PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which
+ *			  has #args >= 3 and odd;
+ * 			- PGBENCH_NARGS_HASH is for hash functions, which have one required
+ *			  and one optional argument;
  * - tag: function identifier from PgBenchFunction enum
  */
 static const struct
@@ -259,10 +267,10 @@ static const struct
 		"abs", 1, PGBENCH_ABS
 	},
 	{
-		"least", -1, PGBENCH_LEAST
+		"least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
 	},
 	{
-		"greatest", -1, PGBENCH_GREATEST
+		"greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
 	},
 	{
 		"debug", 1, PGBENCH_DEBUG
@@ -347,7 +355,25 @@ static const struct
 	},
 	/* "case when ... then ... else ... end" construction */
 	{
-		"!case_end", -2, PGBENCH_CASE
+		"!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
+	},
+	{
+		"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
+	},
+	{
+		"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
+	},
+	{
+		"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
+	},
+	{
+		"hash", -3, PGBENCH_HASH_MURMUR2
+	},
+	{
+		"hash_murmur2", -3, PGBENCH_HASH_MURMUR2
+	},
+	{
+		"hash_fnv1a", -3, PGBENCH_HASH_FNV1A
 	},
 	/* keep as last array element */
 	{
@@ -423,29 +449,51 @@ elist_length(PgBenchExprList *list)
 static PgBenchExpr *
 make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
 {
+	int len = elist_length(args);
+
 	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
 
 	Assert(fnumber >= 0);
 
-	if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
-		PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args))
-		expr_yyerror_more(yyscanner, "unexpected number of arguments",
-						  PGBENCH_FUNCTIONS[fnumber].fname);
-
-	/* check at least one arg for least & greatest */
-	if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
-		elist_length(args) == 0)
-		expr_yyerror_more(yyscanner, "at least one argument expected",
-						  PGBENCH_FUNCTIONS[fnumber].fname);
-	/* special case: case (when ... then ...)+ (else ...)? end */
-	if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
-	{
-		int len = elist_length(args);
-
-		/* 'else' branch is always present, but could be a NULL-constant */
-		if (len < 3 || len % 2 != 1)
-			expr_yyerror_more(yyscanner, "odd and >= 3 number of arguments expected",
-							  "case control structure");
+	/* validate arguments number including few special cases */
+	switch (PGBENCH_FUNCTIONS[fnumber].nargs)
+	{
+		/* check at least one arg for least & greatest */
+		case PGBENCH_NARGS_VARIABLE:
+			if (len == 0)
+				expr_yyerror_more(yyscanner, "at least one argument expected",
+								  PGBENCH_FUNCTIONS[fnumber].fname);
+			break;
+
+		/* case (when ... then ...)+ (else ...)? end */
+		case PGBENCH_NARGS_CASE:
+			/* 'else' branch is always present, but could be a NULL-constant */
+			if (len < 3 || len % 2 != 1)
+				expr_yyerror_more(yyscanner,
+								  "odd and >= 3 number of arguments expected",
+								  "case control structure");
+			break;
+
+		/* hash functions with optional seed argument */
+		case PGBENCH_NARGS_HASH:
+			if (len > 2)
+				expr_yyerror_more(yyscanner, "unexpected number of arguments",
+								  PGBENCH_FUNCTIONS[fnumber].fname);
+
+			if (len == 1)
+			{
+				PgBenchExpr *var = make_variable("default_seed");
+				args = make_elist(var, args);
+			}
+			break;
+
+		/* common case: positive arguments number */
+		default:
+			Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
+
+			if (PGBENCH_FUNCTIONS[fnumber].nargs != len)
+				expr_yyerror_more(yyscanner, "unexpected number of arguments",
+								  PGBENCH_FUNCTIONS[fnumber].fname);
 	}
 
 	expr->etype = ENODE_FUNCTION;
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 31ea6ca..ac69d22 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -61,6 +61,14 @@
 #define ERRCODE_UNDEFINED_TABLE  "42P01"
 
 /*
+ * Hashing constants
+ */
+#define FNV_PRIME 0x100000001b3
+#define FNV_OFFSET_BASIS 0xcbf29ce484222325
+#define MM2_MUL 0xc6a4a7935bd1e995
+#define MM2_ROT 47
+
+/*
  * Multi-platform pthread implementations
  */
 
@@ -915,6 +923,51 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s)
 }
 
 /*
+ * FNV-1a hash function
+ */
+static int64
+getHashFnv1a(int64 val, uint64 seed)
+{
+	int64	result;
+	int		i;
+
+	result = FNV_OFFSET_BASIS ^ seed;
+	for (i = 0; i < 8; ++i)
+	{
+		int32 octet = val & 0xff;
+
+		val = val >> 8;
+		result = result ^ octet;
+		result = result * FNV_PRIME;
+	}
+
+	return result;
+}
+
+/*
+ * Murmur2 hash function
+ */
+static int64
+getHashMurmur2(int64 val, uint64 seed)
+{
+	uint64	result = seed ^ (sizeof(int64) * MM2_MUL);
+	uint64	k = (uint64) val;
+
+	k *= MM2_MUL;
+	k ^= k >> MM2_ROT;
+	k *= MM2_MUL;
+
+	result ^= k;
+	result *= MM2_MUL;
+
+	result ^= result >> MM2_ROT;
+	result *= MM2_MUL;
+	result ^= result >> MM2_ROT;
+
+	return (int64) result;
+}
+
+/*
  * Initialize the given SimpleStats struct to all zeroes
  */
 static void
@@ -2209,6 +2262,30 @@ evalStandardFunc(
 				return true;
 			}
 
+			/* hashing */
+		case PGBENCH_HASH_FNV1A:
+		case PGBENCH_HASH_MURMUR2:
+			{
+				int64	val,
+						seed;
+
+				Assert(nargs == 2);
+
+				if (!coerceToInt(&vargs[0], &val) ||
+					!coerceToInt(&vargs[1], &seed))
+					return false;
+
+				if (func == PGBENCH_HASH_MURMUR2)
+					setIntValue(retval, getHashMurmur2(val, seed));
+				else if (func == PGBENCH_HASH_FNV1A)
+					setIntValue(retval, getHashFnv1a(val, seed));
+				else
+					/* cannot get here */
+					Assert(0);
+
+				return true;
+			}
+
 		default:
 			/* cannot get here */
 			Assert(0);
@@ -4972,6 +5049,10 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
+	/* set random seed */
+	INSTR_TIME_SET_CURRENT(start_time);
+	srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
+
 	if (internal_script_used)
 	{
 		/*
@@ -5033,6 +5114,19 @@ main(int argc, char **argv)
 		}
 	}
 
+	/* set default seed for hash functions */
+	if (lookupVariable(&state[0], "default_seed") == NULL)
+	{
+		uint64	seed = ((uint64) (random() & 0xFFFF) << 48) |
+					   ((uint64) (random() & 0xFFFF) << 32) |
+					   ((uint64) (random() & 0xFFFF) << 16) |
+					   (uint64) (random() & 0xFFFF);
+
+		for (i = 0; i < nclients; i++)
+			if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
+				exit(1);
+	}
+
 	if (!is_no_vacuum)
 	{
 		fprintf(stderr, "starting vacuum...");
@@ -5050,10 +5144,6 @@ main(int argc, char **argv)
 	}
 	PQfinish(con);
 
-	/* set random seed */
-	INSTR_TIME_SET_CURRENT(start_time);
-	srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
-
 	/* set up thread data structures */
 	threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
 	nclients_dealt = 0;
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 0705ccd..6983865 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -97,7 +97,9 @@ typedef enum PgBenchFunction
 	PGBENCH_LE,
 	PGBENCH_LT,
 	PGBENCH_IS,
-	PGBENCH_CASE
+	PGBENCH_CASE,
+	PGBENCH_HASH_FNV1A,
+	PGBENCH_HASH_MURMUR2
 } PgBenchFunction;
 
 typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 99286f6..97945a8 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -259,6 +259,11 @@ pgbench(
 		qr{command=46.: int 46\b},
 		qr{command=47.: boolean true\b},
 		qr{command=48.: boolean true\b},
+		qr{command=49.: int -5817877081768721676\b},
+		qr{command=50.: boolean true\b},
+		qr{command=51.: int -7793829335365542153\b},
+		qr{command=52.: int -?\d+\b},
+		qr{command=53.: boolean true\b},
 	],
 	'pgbench expressions',
 	{   '001_pgbench_expressions' => q{-- integer functions
@@ -327,6 +332,12 @@ pgbench(
 \set n6 debug(:n IS NULL AND NOT :f AND :t)
 -- conditional truth
 \set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND CASE WHEN :n THEN NULL ELSE TRUE END)
+-- hash functions
+\set h0 debug(hash(10, 5432))
+\set h1 debug(:h0 = hash_murmur2(10, 5432))
+\set h3 debug(hash_fnv1a(10, 5432))
+\set h4 debug(hash(10))
+\set h5 debug(hash(10) = hash(10, :default_seed))
 -- lazy evaluation
 \set zy 0
 \set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)

Reply via email to