Seem a nice addition but something isn't quite right; with '-s 50' (no unit)
I get: 'scale 50 too small':
Sigh. Indeed, it seems that I forgot to test some cases... Thanks
for the debug. Here is an hopefully better attempt.
I also upgraded the regression test to test more that "-s 1". I also tried
to improve the documentation to point out that the result is an
approximation.
--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..82343ed 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -49,7 +49,7 @@
<screen>
transaction type: <builtin: TPC-B (sort of)>
-scaling factor: 10
+scaling factor: 10 (about 150 MiB)
query mode: simple
number of clients: 10
number of threads: 1
@@ -282,6 +282,21 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
in order to be big enough to hold the range of account
identifiers.
</para>
+
+ <para>
+ The scale can also be specified as an expected database size by
+ specifying a unit, assuming around 15 MiB size increments per scale
+ unit.
+ For instance, <literal>-s 5G</literal> will approximate the scale
+ required for a 5 GiB database.
+ Note that the size to scale conversion is based on a regression which
+ entails some errors, especially for small scales.
+ Moreover, the actual database size may vary depending on options.
+ Allowed units are IEC 1024 powers (<literal>KiB MiB GiB TiB PiB</literal>),
+ SI 1000 powers (<literal>kB MB GB TB PB</literal>) and for convenience
+ simple size prefixes <literal>K M G T P</literal> are aliases for the IEC
+ binary sizes.
+ </para>
</listitem>
</varlistentry>
@@ -1600,7 +1615,7 @@ END;
<screen>
starting vacuum...end.
transaction type: <builtin: TPC-B (sort of)>
-scaling factor: 1
+scaling factor: 1 (about 15 MiB)
query mode: simple
number of clients: 10
number of threads: 1
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d420942..e3f950a 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -524,7 +524,7 @@ usage(void)
" -F, --fillfactor=NUM set fill factor\n"
" -n, --no-vacuum do not run VACUUM during initialization\n"
" -q, --quiet quiet logging (one message each 5 seconds)\n"
- " -s, --scale=NUM scaling factor\n"
+ " -s, --scale=NUM|SIZE scaling factor or expected database size\n"
" --foreign-keys create foreign key constraints between tables\n"
" --index-tablespace=TABLESPACE\n"
" create indexes in the specified tablespace\n"
@@ -552,7 +552,7 @@ usage(void)
" -P, --progress=NUM show thread progress report every NUM seconds\n"
" -r, --report-latencies report average latency per command\n"
" -R, --rate=NUM target rate in transactions per second\n"
- " -s, --scale=NUM report this scale factor in output\n"
+ " -s, --scale=NUM|SIZE report this scale factor in output\n"
" -t, --transactions=NUM number of transactions each client runs (default: 10)\n"
" -T, --time=NUM duration of benchmark test in seconds\n"
" -v, --vacuum-all vacuum all four standard tables before tests\n"
@@ -668,6 +668,100 @@ gotdigits:
return ((sign < 0) ? -result : result);
}
+/* return a size in bytes, or exit with an error message
+ */
+static int64
+parse_size(char * s, const char * error_message)
+{
+ static struct { char *name; int64 multiplier; }
+ UNITS[17] = {
+ /* IEC units */
+ { "KiB", 1024 },
+ { "MiB", 1024 * 1024 },
+ { "GiB", 1024 * 1024 * 1024 },
+ { "TiB", (int64) 1024 * 1024 * 1024 * 1024 },
+ { "PiB", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+ /* SI units */
+ { "kB", 1000 },
+ { "MB", 1000 * 1000 },
+ { "GB", 1000 * 1000 * 1000 },
+ { "TB", (int64) 1000 * 1000 * 1000 * 1000 },
+ { "PB", (int64) 1000 * 1000 * 1000 * 1000 * 1000 },
+ /* common/convenient JEDEC usage */
+ { "KB", 1024 },
+ { "K", 1024 },
+ { "M", 1024 * 1024 },
+ { "G", 1024 * 1024 * 1024 },
+ { "T", (int64) 1024 * 1024 * 1024 * 1024 },
+ { "P", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+ /* unit */
+ { "B", 1 },
+ };
+
+ int len = strlen(s), last = -1, i;
+ int64 size;
+ char clast;
+
+ /* look for the unit */
+ for (i = 0; i < lengthof(UNITS); i++)
+ if (strcmp(s + len - strlen(UNITS[i].name), UNITS[i].name) == 0)
+ break;
+
+ /* found, or not */
+ if (i < lengthof(UNITS))
+ {
+ last = len - strlen(UNITS[i].name);
+ clast = s[last];
+ s[last] = '\0';
+ }
+ else /* assume bytes */
+ i = lengthof(UNITS) - 1;
+
+ if (!is_an_int(s))
+ {
+ fprintf(stderr, "invalid %s: \"%s\"\n", error_message, s);
+ exit(1);
+ }
+
+ size = strtoint64(s) * UNITS[i].multiplier;
+
+ if (last != -1)
+ s[last] = clast;
+
+ return size;
+}
+
+/* parse scale, returning at least 1 */
+static int
+parse_scale(char * s)
+{
+ int scale;
+
+ if (is_an_int(s))
+ {
+ /* standard scaling */
+ scale = atoi(s);
+ }
+ else
+ {
+ /* try size scaling */
+ int64 size = parse_size(s, "scaling factor");
+ /*
+ * formula from Kaarel Moppel linear regression on pg 10.1,
+ * which gives about 15 MiB per pgbench scale unit
+ */
+ scale = (int) ceil(0.066888816 * size / (1024 * 1024) - 0.511799076);
+ }
+
+ if (scale <= 0)
+ {
+ fprintf(stderr, "scale %s too small, rounded to 1\n", s);
+ scale = 1;
+ }
+
+ return scale;
+}
+
/* random number generator: uniform distribution from min to max inclusive */
static int64
getrand(TState *thread, int64 min, int64 max)
@@ -4244,7 +4338,8 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
/* Report test parameters. */
printf("transaction type: %s\n",
num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
- printf("scaling factor: %d\n", scale);
+ /* scale to MiB evaluation must be consistent with parse_scale */
+ printf("scaling factor: %d (about %.0f MiB)\n", scale, 14.95 * scale);
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
@@ -4560,12 +4655,7 @@ main(int argc, char **argv)
break;
case 's':
scale_given = true;
- scale = atoi(optarg);
- if (scale <= 0)
- {
- fprintf(stderr, "invalid scaling factor: \"%s\"\n", optarg);
- exit(1);
- }
+ scale = parse_scale(optarg);
break;
case 't':
benchmarking_option_set = true;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 99286f6..49e0ce3 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -81,7 +81,7 @@ pgbench(
# Again, with all possible options
pgbench(
-'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+'--initialize --init-steps=dtpvg --scale=30M --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
0,
[qr{^$}i],
[ qr{dropping old tables},
@@ -89,6 +89,7 @@ pgbench(
qr{vacuuming},
qr{creating primary keys},
qr{creating foreign keys},
+ qr{200000 of 200000 tuples}, # scale 2
qr{done\.} ],
'pgbench scale 1 initialization');
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 6ea55f8..52d135c 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -44,6 +44,7 @@ my @options = (
[ 'bad #threads', '-j eleven', [qr{invalid number of threads: "eleven"}]
],
[ 'bad scale', '-i -s two', [qr{invalid scaling factor: "two"}] ],
+ [ 'bad scale size', '-i -s 2stuff', [qr{invalid scaling factor: "2stuff"}] ],
[ 'invalid #transactions',
'-t zil',
[qr{invalid number of transactions: "zil"}] ],