Seem a nice addition but something isn't quite right; with '-s 50' (no unit) I get: 'scale 50 too small':

Sigh. Indeed, it seems that I forgot to test some cases... Thanks for the debug. Here is an hopefully better attempt.

I also upgraded the regression test to test more that "-s 1". I also tried to improve the documentation to point out that the result is an approximation.

--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..82343ed 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -49,7 +49,7 @@
 
 <screen>
 transaction type: &lt;builtin: TPC-B (sort of)&gt;
-scaling factor: 10
+scaling factor: 10 (about 150 MiB)
 query mode: simple
 number of clients: 10
 number of threads: 1
@@ -282,6 +282,21 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
         in order to be big enough to hold the range of account
         identifiers.
        </para>
+
+       <para>
+        The scale can also be specified as an expected database size by
+        specifying a unit, assuming around 15 MiB size increments per scale
+        unit.
+        For instance, <literal>-s 5G</literal> will approximate the scale
+        required for a 5 GiB database.
+        Note that the size to scale conversion is based on a regression which
+        entails some errors, especially for small scales.
+        Moreover, the actual database size may vary depending on options.
+        Allowed units are IEC 1024 powers (<literal>KiB MiB GiB TiB PiB</literal>),
+        SI 1000 powers (<literal>kB MB GB TB PB</literal>) and for convenience
+        simple size prefixes <literal>K M G T P</literal> are aliases for the IEC
+        binary sizes.
+       </para>
       </listitem>
      </varlistentry>
 
@@ -1600,7 +1615,7 @@ END;
 <screen>
 starting vacuum...end.
 transaction type: &lt;builtin: TPC-B (sort of)&gt;
-scaling factor: 1
+scaling factor: 1 (about 15 MiB)
 query mode: simple
 number of clients: 10
 number of threads: 1
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d420942..e3f950a 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -524,7 +524,7 @@ usage(void)
 		   "  -F, --fillfactor=NUM     set fill factor\n"
 		   "  -n, --no-vacuum          do not run VACUUM during initialization\n"
 		   "  -q, --quiet              quiet logging (one message each 5 seconds)\n"
-		   "  -s, --scale=NUM          scaling factor\n"
+		   "  -s, --scale=NUM|SIZE     scaling factor or expected database size\n"
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
@@ -552,7 +552,7 @@ usage(void)
 		   "  -P, --progress=NUM       show thread progress report every NUM seconds\n"
 		   "  -r, --report-latencies   report average latency per command\n"
 		   "  -R, --rate=NUM           target rate in transactions per second\n"
-		   "  -s, --scale=NUM          report this scale factor in output\n"
+		   "  -s, --scale=NUM|SIZE     report this scale factor in output\n"
 		   "  -t, --transactions=NUM   number of transactions each client runs (default: 10)\n"
 		   "  -T, --time=NUM           duration of benchmark test in seconds\n"
 		   "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
@@ -668,6 +668,100 @@ gotdigits:
 	return ((sign < 0) ? -result : result);
 }
 
+/* return a size in bytes, or exit with an error message
+ */
+static int64
+parse_size(char * s, const char * error_message)
+{
+	static struct { char *name; int64 multiplier; }
+		UNITS[17] = {
+			/* IEC units */
+			{ "KiB", 1024 },
+			{ "MiB", 1024 * 1024 },
+			{ "GiB", 1024 * 1024 * 1024 },
+			{ "TiB", (int64) 1024 * 1024 * 1024 * 1024 },
+			{ "PiB", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+			/* SI units */
+			{ "kB", 1000 },
+			{ "MB", 1000 * 1000 },
+			{ "GB", 1000 * 1000 * 1000 },
+			{ "TB", (int64) 1000 * 1000 * 1000 * 1000 },
+			{ "PB", (int64) 1000 * 1000 * 1000 * 1000 * 1000 },
+			/* common/convenient JEDEC usage */
+			{ "KB", 1024 },
+			{ "K", 1024 },
+			{ "M", 1024 * 1024 },
+			{ "G", 1024 * 1024 * 1024 },
+			{ "T", (int64) 1024 * 1024 * 1024 * 1024 },
+			{ "P", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+			/* unit */
+			{ "B", 1 },
+	};
+
+	int		len = strlen(s), last = -1, i;
+	int64	size;
+	char	clast;
+
+	/* look for the unit */
+	for (i = 0; i < lengthof(UNITS); i++)
+		if (strcmp(s + len - strlen(UNITS[i].name), UNITS[i].name) == 0)
+			break;
+
+	/* found, or not */
+	if (i < lengthof(UNITS))
+	{
+		last = len - strlen(UNITS[i].name);
+		clast = s[last];
+		s[last] = '\0';
+	}
+	else /* assume bytes */
+		i = lengthof(UNITS) - 1;
+
+	if (!is_an_int(s))
+	{
+		fprintf(stderr, "invalid %s: \"%s\"\n", error_message, s);
+		exit(1);
+	}
+
+	size = strtoint64(s) * UNITS[i].multiplier;
+
+	if (last != -1)
+		s[last] = clast;
+
+	return size;
+}
+
+/* parse scale, returning at least 1 */
+static int
+parse_scale(char * s)
+{
+	int scale;
+
+	if (is_an_int(s))
+	{
+		/* standard scaling */
+		scale = atoi(s);
+	}
+	else
+	{
+		/* try size scaling */
+		int64 size = parse_size(s, "scaling factor");
+		/*
+		 * formula from Kaarel Moppel linear regression on pg 10.1,
+		 * which gives about 15 MiB per pgbench scale unit
+		 */
+		scale = (int) ceil(0.066888816 * size / (1024 * 1024) - 0.511799076);
+	}
+
+	if (scale <= 0)
+	{
+		fprintf(stderr, "scale %s too small, rounded to 1\n", s);
+		scale = 1;
+	}
+
+	return scale;
+}
+
 /* random number generator: uniform distribution from min to max inclusive */
 static int64
 getrand(TState *thread, int64 min, int64 max)
@@ -4244,7 +4338,8 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
 	/* Report test parameters. */
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
-	printf("scaling factor: %d\n", scale);
+	/* scale to MiB evaluation must be consistent with parse_scale */
+	printf("scaling factor: %d (about %.0f MiB)\n", scale, 14.95 * scale);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -4560,12 +4655,7 @@ main(int argc, char **argv)
 				break;
 			case 's':
 				scale_given = true;
-				scale = atoi(optarg);
-				if (scale <= 0)
-				{
-					fprintf(stderr, "invalid scaling factor: \"%s\"\n", optarg);
-					exit(1);
-				}
+				scale = parse_scale(optarg);
 				break;
 			case 't':
 				benchmarking_option_set = true;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 99286f6..49e0ce3 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -81,7 +81,7 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+'--initialize --init-steps=dtpvg --scale=30M --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
 	0,
 	[qr{^$}i],
 	[   qr{dropping old tables},
@@ -89,6 +89,7 @@ pgbench(
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
+		qr{200000 of 200000 tuples}, # scale 2
 		qr{done\.} ],
 	'pgbench scale 1 initialization');
 
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 6ea55f8..52d135c 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -44,6 +44,7 @@ my @options = (
 	[   'bad #threads', '-j eleven', [qr{invalid number of threads: "eleven"}]
 	],
 	[ 'bad scale', '-i -s two', [qr{invalid scaling factor: "two"}] ],
+	[ 'bad scale size', '-i -s 2stuff', [qr{invalid scaling factor: "2stuff"}] ],
 	[   'invalid #transactions',
 		'-t zil',
 		[qr{invalid number of transactions: "zil"}] ],

Reply via email to