For whatever it is worth, the patch looks good to me.

A minor nitpick would be to use a verb in the part:

`cost when the parameter in (0, 1)`

maybe:

`cost when the parameter's value is in (0, 1)` or similar.

Looks ok.

Apart from that, I would suggest it that the patch could be moved to
Waiting for Author state.

Attached an upated.

--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 24833f46bc..2ded558163 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1607,7 +1607,8 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
       "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
       Springer 1986. The distribution is not defined when the parameter's
       value is 1.0. The function's performance is poor for parameter values
-      close and above 1.0 and on a small range.
+      very close and above 1.0 and on a small range. Also, there is a large setup
+      cost when the parameter's value is in (0, 1) and the interval is large.
      </para>
      <para>
       <replaceable>parameter</replaceable> defines how skewed the distribution
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 4789ab92ee..fa35c27736 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -410,7 +410,7 @@ typedef struct
 } CState;
 
 /*
- * Cache cell for random_zipfian call
+ * Cache cell for random_zipfian call for when s in (0.0, 1.0)
  */
 typedef struct
 {
@@ -418,7 +418,7 @@ typedef struct
 	double		s;				/* s - parameter of random_zipfian function */
 	int64		n;				/* number of elements in range (max - min + 1) */
 
-	double		harmonicn;		/* generalizedHarmonicNumber(n, s) */
+	double		harmonicn;		/* cache expensive generalizedHarmonicNumber(n, s) */
 	double		alpha;
 	double		beta;
 	double		eta;
@@ -982,15 +982,17 @@ getPoissonRand(RandomState *random_state, double center)
 	return (int64) (-log(uniform) * center + 0.5);
 }
 
-/* helper function for getZipfianRand */
+/* expensive helper function for getZipfianRand when s in (0.0, 1.0) */
 static double
 generalizedHarmonicNumber(int64 n, double s)
 {
-	int			i;
+	int64			i;
 	double		ans = 0.0;
 
+	/* computed in reverse order for better numerical precision */
 	for (i = n; i > 1; i--)
 		ans += pow(i, -s);
+
 	return ans + 1.0;
 }
 
@@ -1053,6 +1055,12 @@ zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double s)
  * Computing zipfian using rejection method, based on
  * "Non-Uniform Random Variate Generation",
  * Luc Devroye, p. 550-551, Springer 1986.
+ *
+ * This method applies when s > 1.0 and works with an unbounded n,
+ * although the implementation is bounded.
+ *
+ * It is inefficient when s is close to 1.0 or n is small because the
+ * while loop is more likely to be iterated.
  */
 static int64
 computeIterativeZipfian(RandomState *random_state, int64 n, double s)
@@ -1083,6 +1091,12 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s)
  * Computing zipfian using harmonic numbers, based on algorithm described in
  * "Quickly Generating Billion-Record Synthetic Databases",
  * Jim Gray et al, SIGMOD 1994
+ *
+ * This method applies when s in (0.0, 1.0). It does zipf for the first
+ * two values, and then uses an exponential approximation starting from
+ * the third value.
+ *
+ * There is a potentially very large setup cost when n is large.
  */
 static int64
 computeHarmonicZipfian(ZipfCache *zipf_cache, RandomState *random_state,
@@ -1109,7 +1123,7 @@ getZipfianRand(ZipfCache *zipf_cache, RandomState *random_state, int64 min,
 	/* abort if parameter is invalid */
 	Assert(s > 0.0 && s != 1.0 && s <= MAX_ZIPFIAN_PARAM);
 
-	return min - 1 + ((s > 1)
+	return min - 1 + ((s > 1.0)
 					  ? computeIterativeZipfian(random_state, n, s)
 					  : computeHarmonicZipfian(zipf_cache, random_state, n, s));
 }

Reply via email to