Hello Fabien,

11/01/2018 19:21, Ildar Musin пишет:
>
> 10/01/2018 21:42, Fabien COELHO пишет:
>> Hmm. I do not think that we should want a shared seed value. The seed
>> should be different for each call so as to avoid undesired
>> correlations. If wanted, correlation could be obtained by using an
>> explicit identical seed.
>>
>> ISTM that the best way to add the seed is to call random() when the
>> second arg is missing in make_func. Also, this means that the executor
>> would always get its two arguments, so it would simplify the code there.
>>
> Ok, I think I understand what you meant. You meant the case like following:
>
> \set x random(1, 100)
> \set h1 hash(:x)
> \set h2 hash(:x)  -- will have different seed from h1
>
> so that different instances of hash function within one script would
> have different seeds. Yes, that is a good idea, I can do that.
>
Added this feature in attached patch. But on a second thought this could
be something that user won't expect. For example, they may want to run
pgbench with two scripts:
- the first one updates row by key that is a hashed random_zipfian value;
- the second one reads row by key generated the same way
(that is actually what YCSB workloads A and B do)

It feels natural to write something like this:
\set rnd random_zipfian(0, 1000000, 0.99)
\set key abs(hash(:rnd)) % 1000
in both scripts and expect that they both would have the same
distribution. But they wouldn't. We could of course describe this
implicit behaviour in documentation, but ISTM that shared seed would be
more clear.

Thanks!

-- 
Ildar Musin
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company 

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..c575f19 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1246,6 +1246,27 @@ pgbench <optional> <replaceable>options</replaceable> 
</optional> <replaceable>d
        <entry><literal>5</literal></entry>
       </row>
       <row>
+       <entry><literal><function>hash(<replaceable>a</replaceable> [, 
<replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry>alias for <literal>hash_murmur2()</literal></entry>
+       <entry><literal>hash(10, 5432)</literal></entry>
+       <entry><literal>-5817877081768721676</literal></entry>
+      </row>
+      <row>
+       <entry><literal><function>hash_fnv1a(<replaceable>a</replaceable> [, 
<replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry><literal>FNV</literal> hash</entry>
+       <entry><literal>hash_fnv1a(10, 5432)</literal></entry>
+       <entry><literal>-7793829335365542153</literal></entry>
+      </row>
+      <row>
+       <entry><literal><function>hash_murmur2(<replaceable>a</replaceable> [, 
<replaceable>seed</replaceable> ] )</function></literal></entry>
+       <entry>integer</entry>
+       <entry><literal>murmur2</literal> hash</entry>
+       <entry><literal>hash_murmur2(10, 5432)</literal></entry>
+       <entry><literal>-5817877081768721676</literal></entry>
+      </row>
+      <row>
        
<entry><literal><function>int(<replaceable>x</replaceable>)</function></literal></entry>
        <entry>integer</entry>
        <entry>cast to int</entry>
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index e23ca51..36cad30 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -16,6 +16,10 @@
 
 #include "pgbench.h"
 
+#define PGBENCH_NARGS_VARIABLE (-1)
+#define PGBENCH_NARGS_CASE             (-2)
+#define PGBENCH_NARGS_HASH             (-3)
+
 PgBenchExpr *expr_parse_result;
 
 static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
@@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, 
PgBenchExpr *expr)
 /*
  * List of available functions:
  * - fname: function name, "!..." for special internal functions
- * - nargs: number of arguments
- *                     -1 is a special value for least & greatest meaning 
#args >= 1
- *                     -2 is for the "CASE WHEN ..." function, which has #args 
>= 3 and odd
+ * - nargs: number of arguments. Special cases:
+ *                     - PGBENCH_NARGS_VARIABLE is a special value for least & 
greatest
+ *                       meaning #args >= 1;
+ *                     - PGBENCH_NARGS_CASE is for the "CASE WHEN ..." 
function, which
+ *                       has #args >= 3 and odd;
+ *                     - PGBENCH_NARGS_HASH is for hash functions, which have 
one required
+ *                       and one optional argument;
  * - tag: function identifier from PgBenchFunction enum
  */
 static const struct
@@ -259,10 +267,10 @@ static const struct
                "abs", 1, PGBENCH_ABS
        },
        {
-               "least", -1, PGBENCH_LEAST
+               "least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
        },
        {
-               "greatest", -1, PGBENCH_GREATEST
+               "greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
        },
        {
                "debug", 1, PGBENCH_DEBUG
@@ -347,7 +355,16 @@ static const struct
        },
        /* "case when ... then ... else ... end" construction */
        {
-               "!case_end", -2, PGBENCH_CASE
+               "!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
+       },
+       {
+               "hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
+       },
+       {
+               "hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
+       },
+       {
+               "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
        },
        /* keep as last array element */
        {
@@ -423,29 +440,47 @@ elist_length(PgBenchExprList *list)
 static PgBenchExpr *
 make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
 {
+       int len = elist_length(args);
+
        PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
 
        Assert(fnumber >= 0);
 
-       if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
-               PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args))
-               expr_yyerror_more(yyscanner, "unexpected number of arguments",
-                                                 
PGBENCH_FUNCTIONS[fnumber].fname);
-
-       /* check at least one arg for least & greatest */
-       if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
-               elist_length(args) == 0)
-               expr_yyerror_more(yyscanner, "at least one argument expected",
-                                                 
PGBENCH_FUNCTIONS[fnumber].fname);
-       /* special case: case (when ... then ...)+ (else ...)? end */
-       if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
-       {
-               int len = elist_length(args);
-
-               /* 'else' branch is always present, but could be a 
NULL-constant */
-               if (len < 3 || len % 2 != 1)
-                       expr_yyerror_more(yyscanner, "odd and >= 3 number of 
arguments expected",
-                                                         "case control 
structure");
+       /* validate arguments number including few special cases */
+       switch (PGBENCH_FUNCTIONS[fnumber].nargs)
+       {
+               /* check at least one arg for least & greatest */
+               case PGBENCH_NARGS_VARIABLE:
+                       if (len == 0)
+                               expr_yyerror_more(yyscanner, "at least one 
argument expected",
+                                                                 
PGBENCH_FUNCTIONS[fnumber].fname);
+                       break;
+
+               /* case (when ... then ...)+ (else ...)? end */
+               case PGBENCH_NARGS_CASE:
+                       /* 'else' branch is always present, but could be a 
NULL-constant */
+                       if (len < 3 || len % 2 != 1)
+                               expr_yyerror_more(yyscanner,
+                                                                 "odd and >= 3 
number of arguments expected",
+                                                                 "case control 
structure");
+                       break;
+
+               /* hash functions with optional seed argument */
+               case PGBENCH_NARGS_HASH:
+                       if (len < 1 || len > 2)
+                       expr_yyerror_more(yyscanner, "unexpected number of 
arguments",
+                                                         
PGBENCH_FUNCTIONS[fnumber].fname);
+
+                       /* if seed argument is missing add random one */
+                       if (len == 1)
+                               args = 
make_elist(make_integer_constant(random()), args);
+                       break;
+
+               /* common case: positive arguments number */
+               default:
+                       if (PGBENCH_FUNCTIONS[fnumber].nargs != len)
+                               expr_yyerror_more(yyscanner, "unexpected number 
of arguments",
+                                                                 
PGBENCH_FUNCTIONS[fnumber].fname);
        }
 
        expr->etype = ENODE_FUNCTION;
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 31ea6ca..396525e 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -61,6 +61,14 @@
 #define ERRCODE_UNDEFINED_TABLE  "42P01"
 
 /*
+ * Hashing constants
+ */
+#define FNV_PRIME 0x100000001b3
+#define FNV_OFFSET_BASIS 0xcbf29ce484222325
+#define MM2_MUL 0xc6a4a7935bd1e995
+#define MM2_ROT 47
+
+/*
  * Multi-platform pthread implementations
  */
 
@@ -915,6 +923,51 @@ getZipfianRand(TState *thread, int64 min, int64 max, 
double s)
 }
 
 /*
+ * FNV-1a hash function
+ */
+static int64
+getHashFnv1a(int64 val, uint64 seed)
+{
+       int64   result;
+       int             i;
+
+       result = FNV_OFFSET_BASIS ^ seed;
+       for (i = 0; i < 8; ++i)
+       {
+               int32 octet = val & 0xff;
+
+               val = val >> 8;
+               result = result ^ octet;
+               result = result * FNV_PRIME;
+       }
+
+       return result;
+}
+
+/*
+ * Murmur2 hash function
+ */
+static int64
+getHashMurmur2(int64 val, uint64 seed)
+{
+       uint64  result = seed ^ (sizeof(int64) * MM2_MUL);
+       uint64  k = (uint64) val;
+
+       k *= MM2_MUL;
+       k ^= k >> MM2_ROT;
+       k *= MM2_MUL;
+
+       result ^= k;
+       result *= MM2_MUL;
+
+       result ^= result >> MM2_ROT;
+       result *= MM2_MUL;
+       result ^= result >> MM2_ROT;
+
+       return (int64) result;
+}
+
+/*
  * Initialize the given SimpleStats struct to all zeroes
  */
 static void
@@ -2209,6 +2262,28 @@ evalStandardFunc(
                                return true;
                        }
 
+                       /* hashing */
+               case PGBENCH_HASH_FNV1A:
+               case PGBENCH_HASH_MURMUR2:
+                       {
+                               int64   val;
+                               int64   seed;
+                               int64   result;
+
+                               Assert(nargs == 2);
+
+                               if (!coerceToInt(&vargs[0], &val))
+                                       return false;
+
+                               if (!coerceToInt(&vargs[1], &seed))
+                                               return false;
+
+                               result = (func == PGBENCH_HASH_FNV1A) ?
+                                       getHashFnv1a(val, seed) : 
getHashMurmur2(val, seed);
+                               setIntValue(retval, result);
+                               return true;
+                       }
+
                default:
                        /* cannot get here */
                        Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 0705ccd..6983865 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -97,7 +97,9 @@ typedef enum PgBenchFunction
        PGBENCH_LE,
        PGBENCH_LT,
        PGBENCH_IS,
-       PGBENCH_CASE
+       PGBENCH_CASE,
+       PGBENCH_HASH_FNV1A,
+       PGBENCH_HASH_MURMUR2
 } PgBenchFunction;
 
 typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl 
b/src/bin/pgbench/t/001_pgbench_with_server.pl
index a8b2962..eda28ea 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -259,6 +259,10 @@ pgbench(
                qr{command=46.: int 46\b},
                qr{command=47.: boolean true\b},
                qr{command=48.: boolean true\b},
+               qr{command=49.: int -5817877081768721676\b},
+               qr{command=50.: boolean true\b},
+               qr{command=51.: int -7793829335365542153\b},
+               qr{command=52.: int -?\d+\b},
        ],
        'pgbench expressions',
        {   '001_pgbench_expressions' => q{-- integer functions
@@ -327,6 +331,11 @@ pgbench(
 \set n6 debug(:n IS NULL AND NOT :f AND :t)
 -- conditional truth
 \set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND 
CASE WHEN :n THEN NULL ELSE TRUE END)
+-- hash functions
+\set h0 debug(hash(10, 5432))
+\set h1 debug(:h0 = hash_murmur2(10, 5432))
+\set h3 debug(hash_fnv1a(10, 5432))
+\set h4 debug(hash(10))
 -- lazy evaluation
 \set zy 0
 \set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)

Reply via email to