From ca3593282e96184615f7a004d52e63dd98b79469 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@otacoo.com>
Date: Wed, 16 Dec 2015 14:53:56 +0900
Subject: [PATCH] Make pgbench documentation more precise for function
 parameters

This commit generalizes the way function parameters are defined in the
function handling of pgbench by replacing the term threshold by the more
general term parameter, and improves the description of the Gaussian
function to be more general.

Author: Fabien Coelho
---
 doc/src/sgml/ref/pgbench.sgml | 67 ++++++++++++++++++++++------------------
 src/bin/pgbench/pgbench.c     | 71 ++++++++++++++++++++++++-------------------
 2 files changed, 78 insertions(+), 60 deletions(-)

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 0ac40f1..541d17b 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -788,7 +788,7 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
 
    <varlistentry>
     <term>
-     <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | { gaussian | exponential } <replaceable>threshold</> ]</literal>
+     <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | { gaussian | exponential } <replaceable>parameter</> ]</literal>
      </term>
 
     <listitem>
@@ -804,54 +804,63 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       By default, or when <literal>uniform</> is specified, all values in the
       range are drawn with equal probability.  Specifying <literal>gaussian</>
       or  <literal>exponential</> options modifies this behavior; each
-      requires a mandatory threshold which determines the precise shape of the
+      requires a mandatory parameter which determines the precise shape of the
       distribution.
      </para>
 
      <para>
       For a Gaussian distribution, the interval is mapped onto a standard
       normal distribution (the classical bell-shaped Gaussian curve) truncated
-      at <literal>-threshold</> on the left and <literal>+threshold</>
+      at <literal>-parameter</> on the left and <literal>+parameter</>
       on the right.
+      Values in the middle of the interval are more likely to be drawn.
       To be precise, if <literal>PHI(x)</> is the cumulative distribution
       function of the standard normal distribution, with mean <literal>mu</>
-      defined as <literal>(max + min) / 2.0</>, then value <replaceable>i</>
-      between <replaceable>min</> and <replaceable>max</> inclusive is drawn
-      with probability:
-      <literal>
-        (PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) -
-         PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) /
-         (2.0 * PHI(threshold) - 1.0)</>.
-      Intuitively, the larger the <replaceable>threshold</>, the more
+      defined as <literal>(max + min) / 2.0</>, with
+<literallayout>
+ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
+        (2.0 * PHI(parameter) - 1.0)
+</literallayout>
+      then value <replaceable>i</> between <replaceable>min</> and
+      <replaceable>max</> inclusive is drawn with probability:
+      <literal>f(i + 0.5) - f(i - 0.5)</>.
+      Intuitively, the larger <replaceable>parameter</>, the more
       frequently values close to the middle of the interval are drawn, and the
       less frequently values close to the <replaceable>min</> and
-      <replaceable>max</> bounds.
-      About 67% of values are drawn from the middle <literal>1.0 / threshold</>
-      and 95% in the middle <literal>2.0 / threshold</>; for instance, if
-      <replaceable>threshold</> is 4.0, 67% of values are drawn from the middle
-      quarter and 95% from the middle half of the interval.
-      The minimum <replaceable>threshold</> is 2.0 for performance of
-      the Box-Muller transform.
+      <replaceable>max</> bounds. About 67% of values are drawn from the
+      middle <literal>1.0 / parameter</>, that is a relative
+      <literal>0.5 / parameter</> around the mean, and 95% in the middle
+      <literal>2.0 / parameter</>, that is a relative
+      <literal>1.0 / parameter</> around the mean; for instance, if
+      <replaceable>parameter</> is 4.0, 67% of values are drawn from the
+      middle quarter (1.0 / 4.0) of the interval (i.e. from
+      <literal>3.0 / 8.0</> to <literal>5.0 / 8.0</>) and 95% from
+      the middle half (<literal>2.0 / 4.0</>) of the interval (second and
+      third quartiles). The minimum <replaceable>parameter</> is 2.0 for
+      performance of the Box-Muller transform.
      </para>
 
      <para>
-      For an exponential distribution, the <replaceable>threshold</>
-      parameter controls the distribution by truncating a quickly-decreasing
-      exponential distribution at <replaceable>threshold</>, and then
+      For an exponential distribution, <replaceable>parameter</>
+      controls the distribution by truncating a quickly-decreasing
+      exponential distribution at <replaceable>parameter</>, and then
       projecting onto integers between the bounds.
-      To be precise, value <replaceable>i</> between <replaceable>min</> and
+      To be precise, with
+<literallayout>
+f(x) = exp(-parameter * (x - min) / (max - min + 1)) / (1.0 - exp(-parameter))
+</literallayout>
+      Then value <replaceable>i</> between <replaceable>min</> and
       <replaceable>max</> inclusive is drawn with probability:
-      <literal>(exp(-threshold*(i-min)/(max+1-min)) -
-       exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold))</>.
-      Intuitively, the larger the <replaceable>threshold</>, the more
+      <literal>f(x) - f(x + 1)</>.
+      Intuitively, the larger <replaceable>parameter</>, the more
       frequently values close to <replaceable>min</> are accessed, and the
       less frequently values close to <replaceable>max</> are accessed.
-      The closer to 0 the threshold, the flatter (more uniform) the access
-      distribution.
+      The closer to 0 <replaceable>parameter</>, the flatter (more uniform)
+      the access distribution.
       A crude approximation of the distribution is that the most frequent 1%
       values in the range, close to <replaceable>min</>, are drawn
-      <replaceable>threshold</>%  of the time.
-      The <replaceable>threshold</> value must be strictly positive.
+      <replaceable>parameter</>% of the time.
+      <replaceable>parameter</> value must be strictly positive.
      </para>
 
      <para>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index f2d435b..5c985f9 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -90,7 +90,7 @@ static int	pthread_join(pthread_t th, void **thread_return);
 #define LOG_STEP_SECONDS	5	/* seconds between log messages */
 #define DEFAULT_NXACTS	10		/* default nxacts */
 
-#define MIN_GAUSSIAN_THRESHOLD		2.0 /* minimum threshold for gauss */
+#define MIN_GAUSSIAN_PARAM		2.0 /* minimum parameter for gauss */
 
 int			nxacts = 0;			/* number of transactions per client */
 int			duration = 0;		/* duration in seconds */
@@ -488,47 +488,47 @@ getrand(TState *thread, int64 min, int64 max)
 
 /*
  * random number generator: exponential distribution from min to max inclusive.
- * the threshold is so that the density of probability for the last cut-off max
- * value is exp(-threshold).
+ * the parameter is so that the density of probability for the last cut-off max
+ * value is exp(-parameter).
  */
 static int64
-getExponentialRand(TState *thread, int64 min, int64 max, double threshold)
+getExponentialRand(TState *thread, int64 min, int64 max, double parameter)
 {
 	double		cut,
 				uniform,
 				rand;
 
-	Assert(threshold > 0.0);
-	cut = exp(-threshold);
+	Assert(parameter > 0.0);
+	cut = exp(-parameter);
 	/* erand in [0, 1), uniform in (0, 1] */
 	uniform = 1.0 - pg_erand48(thread->random_state);
 
 	/*
-	 * inner expresion in (cut, 1] (if threshold > 0), rand in [0, 1)
+	 * inner expresion in (cut, 1] (if parameter > 0), rand in [0, 1)
 	 */
 	Assert((1.0 - cut) != 0.0);
-	rand = -log(cut + (1.0 - cut) * uniform) / threshold;
+	rand = -log(cut + (1.0 - cut) * uniform) / parameter;
 	/* return int64 random number within between min and max */
 	return min + (int64) ((max - min + 1) * rand);
 }
 
 /* random number generator: gaussian distribution from min to max inclusive */
 static int64
-getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
+getGaussianRand(TState *thread, int64 min, int64 max, double parameter)
 {
 	double		stdev;
 	double		rand;
 
 	/*
-	 * Get user specified random number from this loop, with -threshold <
-	 * stdev <= threshold
+	 * Get user specified random number from this loop,
+	 * with -parameter < stdev <= parameter
 	 *
 	 * This loop is executed until the number is in the expected range.
 	 *
-	 * As the minimum threshold is 2.0, the probability of looping is low:
+	 * As the minimum parameter is 2.0, the probability of looping is low:
 	 * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the
 	 * average sinus multiplier as 2/pi, we have a 8.6% looping probability in
-	 * the worst case. For a 5.0 threshold value, the looping probability is
+	 * the worst case. For a parameter value of 5.0, the looping probability is
 	 * about e^{-5} * 2 / pi ~ 0.43%.
 	 */
 	do
@@ -553,10 +553,10 @@ getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
 		 * over.
 		 */
 	}
-	while (stdev < -threshold || stdev >= threshold);
+	while (stdev < -parameter || stdev >= parameter);
 
-	/* stdev is in [-threshold, threshold), normalization to [0,1) */
-	rand = (stdev + threshold) / (threshold * 2.0);
+	/* stdev is in [-parameter, parameter), normalization to [0,1) */
+	rand = (stdev + parameter) / (parameter * 2.0);
 
 	/* return int64 random number within between min and max */
 	return min + (int64) ((max - min + 1) * rand);
@@ -1483,7 +1483,7 @@ top:
 			char	   *var;
 			int64		min,
 						max;
-			double		threshold = 0;
+			double		parameter = 0;
 			char		res[64];
 
 			if (*argv[2] == ':')
@@ -1554,41 +1554,49 @@ top:
 				{
 					if ((var = getVariable(st, argv[5] + 1)) == NULL)
 					{
-						fprintf(stderr, "%s: invalid threshold number: \"%s\"\n",
+						fprintf(stderr, "%s: invalid parameter: \"%s\"\n",
 								argv[0], argv[5]);
 						st->ecnt++;
 						return true;
 					}
-					threshold = strtod(var, NULL);
+					parameter = strtod(var, NULL);
 				}
 				else
-					threshold = strtod(argv[5], NULL);
+					parameter = strtod(argv[5], NULL);
 
 				if (pg_strcasecmp(argv[4], "gaussian") == 0)
 				{
-					if (threshold < MIN_GAUSSIAN_THRESHOLD)
+					if (parameter < MIN_GAUSSIAN_PARAM)
 					{
-						fprintf(stderr, "gaussian threshold must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_THRESHOLD, argv[5]);
+						fprintf(stderr, "gaussian parameter must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_PARAM, argv[5]);
 						st->ecnt++;
 						return true;
 					}
 #ifdef DEBUG
-					printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold));
+					printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n",
+						   min, max,
+						   getGaussianRand(thread, min, max, parameter));
 #endif
-					snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold));
+					snprintf(res, sizeof(res), INT64_FORMAT,
+							 getGaussianRand(thread, min, max, parameter));
 				}
 				else if (pg_strcasecmp(argv[4], "exponential") == 0)
 				{
-					if (threshold <= 0.0)
+					if (parameter <= 0.0)
 					{
-						fprintf(stderr, "exponential threshold must be greater than zero (not \"%s\")\n", argv[5]);
+						fprintf(stderr,
+							"exponential parameter must be greater than zero (not \"%s\")\n",
+							argv[5]);
 						st->ecnt++;
 						return true;
 					}
 #ifdef DEBUG
-					printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold));
+					printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n",
+						   min, max,
+						   getExponentialRand(thread, min, max, parameter));
 #endif
-					snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
+					snprintf(res, sizeof(res), INT64_FORMAT,
+							 getExponentialRand(thread, min, max, parameter));
 				}
 			}
 			else	/* this means an error somewhere in the parsing phase... */
@@ -2282,8 +2290,9 @@ process_commands(char *buf, const char *source, const int lineno)
 		if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
 		{
 			/*
-			 * parsing: \setrandom variable min max [uniform] \setrandom
-			 * variable min max (gaussian|exponential) threshold
+			 * parsing:
+			 *   \setrandom variable min max [uniform]
+			 *   \setrandom variable min max (gaussian|exponential) parameter
 			 */
 
 			if (my_commands->argc < 4)
@@ -2308,7 +2317,7 @@ process_commands(char *buf, const char *source, const int lineno)
 				if (my_commands->argc < 6)
 				{
 					syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
-					 "missing threshold argument", my_commands->argv[4], -1);
+					 "missing parameter", my_commands->argv[4], -1);
 				}
 				else if (my_commands->argc > 6)
 				{
-- 
2.6.3

