Hello Amit,
Attached v18: - remove the test tablespace I had to work around a strange issue around partitioned tables and the default tablespace.- if (tablespace != NULL) + if (tablespace != NULL && strcmp(tablespace, "pg_default") != 0) [...] I don't think we need any such check, rather if the user gives default_tablespace with 'partitions' option, then let it fail with an error "cannot specify default tablespace for partitioned relations".
That is the one I wanted to avoid, which is triggered by TAP tests, but I'm fine with putting back a tablespace. Given partitioned table strange constraints, ISTM desirable to check that it works with options such as tablespace and fillfactor.
(b) Create a non-default tablespace to test partitions with "all possible options" test as you have in your previous version.
Also, add a comment explaining why in that test we are using non-default tablespace.
I am leaning towards approach (b) unless you and or Alvaro feels (a) is good for now or if you have some other idea.
No other idea. I put back the tablespace creation which I just removed, with comments about why it is there.
If we want to go with option (b), I have small comment in your previous test: +# tablespace for testing +my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir'; +mkdir $ts or die "cannot create directory $ts"; +my $ets = TestLib::perl2host($ts); +# add needed escaping! +$ets =~ s/'/''/; I am not sure if we really need this quote skipping stuff. Why can't we write the test as below: # tablespace for testing my $basedir = $node->basedir; my $ts = "$basedir/regress_pgbench_tap_1_ts_dir"; mkdir $ts or die "cannot create directory $ts"; $ts = TestLib::perl2host($ts); $node->safe_psql('postgres', "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
I think that this last command fails if the path contains a "'", so the '-escaping is necessary. I had to make changes in TAP tests before because it was not working when the path was a little bit strange, so now I'm careful.
Attached v19: - put back a local tablespace plus comments - remove the pg_default doubtful workaround. -- Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index c857aa3cba..e3a0abb4c7 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d </listitem> </varlistentry> + <varlistentry> + <term><option>--partitions=<replaceable>NUM</replaceable></option></term> + <listitem> + <para> + Create a partitioned <literal>pgbench_accounts</literal> table with + <replaceable>NUM</replaceable> partitions of nearly equal size for + the scaled number of accounts. + Default is <literal>0</literal>, meaning no partitioning. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>--partition-method=<replaceable>NAME</replaceable></option></term> + <listitem> + <para> + Create a partitioned <literal>pgbench_accounts</literal> table with + <replaceable>NAME</replaceable> method. + Expected values are <literal>range</literal> or <literal>hash</literal>. + This option requires that <option>--partitions</option> is set to non-zero. + If unspecified, default is <literal>range</literal>. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term> <listitem> diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index ed7652bfbf..d71e38b8a8 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -186,6 +186,25 @@ int64 latency_limit = 0; char *tablespace = NULL; char *index_tablespace = NULL; +/* + * Number of "pgbench_accounts" partitions, found or to create. + * When creating, 0 is the default and means no partitioning. + * When running, this is the actual number of partitions. + */ +static int partitions = 0; + +/* partitioning strategy for "pgbench_accounts" */ +typedef enum +{ + PART_NONE, /* no partitioning */ + PART_RANGE, /* range partitioning */ + PART_HASH /* hash partitioning */ +} + partition_method_t; + +static partition_method_t partition_method = PART_NONE; +static const char *PARTITION_METHOD[] = {"none", "range", "hash"}; + /* random seed used to initialize base_random_sequence */ int64 random_seed = -1; @@ -617,6 +636,9 @@ usage(void) " --foreign-keys create foreign key constraints between tables\n" " --index-tablespace=TABLESPACE\n" " create indexes in the specified tablespace\n" + " --partitions=NUM partition pgbench_accounts in NUM parts (default: 0)\n" + " --partition-method=(range|hash)\n" + " partition pgbench_accounts with this method (default: range)\n" " --tablespace=TABLESPACE create tables in the specified tablespace\n" " --unlogged-tables create tables as unlogged tables\n" "\nOptions to select what to run:\n" @@ -3601,6 +3623,89 @@ initDropTables(PGconn *con) "pgbench_tellers"); } +/* + * add fillfactor percent option. + * + * As default is 100, it could be removed in this case. + */ +static void +append_fillfactor(char *opts, int len) +{ + snprintf(opts + strlen(opts), len - strlen(opts), + " with (fillfactor=%d)", fillfactor); +} + +/* + * Create "pgbench_accounts" partitions if needed. + * + * This is the larger table of pgbench default tpc-b like schema + * with a known size, so that it can be partitioned by range. + */ +static void +createPartitions(PGconn *con) +{ + char ff[64]; + + ff[0] = '\0'; + + /* + * Per ddlinfo in initCreateTables below, fillfactor is needed on + * table pgbench_accounts. + */ + append_fillfactor(ff, sizeof(ff)); + + /* we must have to create some partitions */ + Assert(partitions > 0); + + fprintf(stderr, "creating %d partitions...\n", partitions); + + for (int p = 1; p <= partitions; p++) + { + char query[256]; + + if (partition_method == PART_RANGE) + { + int64 part_size = (naccounts * (int64) scale + partitions - 1) / partitions; + char minvalue[32], + maxvalue[32]; + + /* + * For RANGE, we use open-ended partitions at the beginning and + * end to allow any valid value for the primary key. + * Although the actual minimum and maximum values can be derived + * from the scale, it is more generic and the performance is better. + */ + if (p == 1) + sprintf(minvalue, "minvalue"); + else + sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1); + + if (p < partitions) + sprintf(maxvalue, INT64_FORMAT, p * part_size + 1); + else + sprintf(maxvalue, "maxvalue"); + + snprintf(query, sizeof(query), + "create%s table pgbench_accounts_%d\n" + " partition of pgbench_accounts\n" + " for values from (%s) to (%s)%s\n", + unlogged_tables ? " unlogged" : "", p, + minvalue, maxvalue, ff); + } + else if (partition_method == PART_HASH) + snprintf(query, sizeof(query), + "create%s table pgbench_accounts_%d\n" + " partition of pgbench_accounts\n" + " for values with (modulus %d, remainder %d)%s\n", + unlogged_tables ? " unlogged" : "", p, + partitions, p - 1, ff); + else /* cannot get there */ + Assert(0); + + executeStatement(con, query); + } +} + /* * Create pgbench's standard tables */ @@ -3664,9 +3769,15 @@ initCreateTables(PGconn *con) /* Construct new create table statement. */ opts[0] = '\0'; - if (ddl->declare_fillfactor) + + /* Partition pgbench_accounts table */ + if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0) snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts), - " with (fillfactor=%d)", fillfactor); + " partition by %s (aid)", PARTITION_METHOD[partition_method]); + else if (ddl->declare_fillfactor) + /* fillfactor is only expected on actual tables */ + append_fillfactor(opts, sizeof(opts)); + if (tablespace != NULL) { char *escape_tablespace; @@ -3686,6 +3797,9 @@ initCreateTables(PGconn *con) executeStatement(con, buffer); } + + if (partition_method != PART_NONE) + createPartitions(con); } /* @@ -4919,6 +5033,10 @@ printResults(StatsData *total, instr_time total_time, printf("transaction type: %s\n", num_scripts == 1 ? sql_script[0].desc : "multiple scripts"); printf("scaling factor: %d\n", scale); + /* only print partitioning information if some partitioning was detected */ + if (partition_method != PART_NONE) + printf("partition method: %s\npartitions: %d\n", + PARTITION_METHOD[partition_method], partitions); printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); @@ -5083,6 +5201,122 @@ set_random_seed(const char *seed) return true; } +/* + * Extract pgbench table informations into global variables scale, + * partition_method and partitions. + */ +static void +GetTableInfo(PGconn *con, bool scale_given) +{ + PGresult *res; + + /* + * get the scaling factor that should be same as count(*) from + * pgbench_branches if this is not a custom query + */ + res = PQexec(con, "select count(*) from pgbench_branches"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE); + + fprintf(stderr, "%s", PQerrorMessage(con)); + if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0) + { + fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con)); + } + + exit(1); + } + scale = atoi(PQgetvalue(res, 0, 0)); + if (scale < 0) + { + fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n", + PQgetvalue(res, 0, 0)); + exit(1); + } + PQclear(res); + + /* warn if we override user-given -s switch */ + if (scale_given) + fprintf(stderr, + "scale option ignored, using count from pgbench_branches table (%d)\n", + scale); + + /* + * Get the partition information for the first "pgbench_accounts" table + * found in search_path. + * + * The result is empty if no "pgbench_accounts" is found. + * + * Otherwise, it always returns one row even if the table is not + * partitioned (in which case the partition strategy is NULL). + * + * The number of partitions can be 0 even for partitioned tables, if no + * partition are attached. + * + * We Assume no partitioning on any failure, so as to avoid failing on + * an old version without "pg_partitioned_table". + */ + res = PQexec(con, + "select o.n, p.partstrat, pg_catalog.count(i.inhparent) " + "from pg_catalog.pg_class as c " + "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) " + "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) " + "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) " + "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) " + "where c.relname = 'pgbench_accounts' and o.n is not null " + "group by 1, 2 " + "order by 1 asc " + "limit 1"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + /* probably an older version, coldly assume no partitioning */ + partition_method = PART_NONE; + partitions = 0; + } + else if (PQntuples(res) == 0) + { + /* + * This case is unlikely as pgbench already found "pgbench_branches" + * above to compute the scale. + */ + fprintf(stderr, + "no pgbench_accounts table found in search_path\n" + "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con)); + exit(1); + } + else /* PQntupes(res) == 1 */ + { + /* normal case, extract partition information */ + if (PQgetisnull(res, 0, 1)) + partition_method = PART_NONE; + else + { + char *ps = PQgetvalue(res, 0, 1); + + /* column must be there */ + Assert(ps != NULL); + + if (strcmp(ps, "r") == 0) + partition_method = PART_RANGE; + else if (strcmp(ps, "h") == 0) + partition_method = PART_HASH; + else + { + /* possibly a newer version with new partition method */ + fprintf(stderr, "unexpected partition method: \"%s\"\n", ps); + exit(1); + } + } + + partitions = atoi(PQgetvalue(res, 0, 2)); + } + + PQclear(res); +} + + int main(int argc, char **argv) { @@ -5126,6 +5360,8 @@ main(int argc, char **argv) {"foreign-keys", no_argument, NULL, 8}, {"random-seed", required_argument, NULL, 9}, {"show-script", required_argument, NULL, 10}, + {"partitions", required_argument, NULL, 11}, + {"partition-method", required_argument, NULL, 12}, {NULL, 0, NULL, 0} }; @@ -5160,7 +5396,6 @@ main(int argc, char **argv) #endif PGconn *con; - PGresult *res; char *env; int exit_code = 0; @@ -5486,6 +5721,29 @@ main(int argc, char **argv) exit(0); } break; + case 11: /* partitions */ + initialization_option_set = true; + partitions = atoi(optarg); + if (partitions < 0) + { + fprintf(stderr, "invalid number of partitions: \"%s\"\n", + optarg); + exit(1); + } + break; + case 12: /* partition-method */ + initialization_option_set = true; + if (pg_strcasecmp(optarg, "range") == 0) + partition_method = PART_RANGE; + else if (pg_strcasecmp(optarg, "hash") == 0) + partition_method = PART_HASH; + else + { + fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\"," + " got: \"%s\"\n", optarg); + exit(1); + } + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -5567,6 +5825,16 @@ main(int argc, char **argv) exit(1); } + if (partitions == 0 && partition_method != PART_NONE) + { + fprintf(stderr, "--partition-method requires greater than zero --partitions\n"); + exit(1); + } + + /* set default method */ + if (partitions > 0 && partition_method == PART_NONE) + partition_method = PART_RANGE; + if (initialize_steps == NULL) initialize_steps = pg_strdup(DEFAULT_INIT_STEPS); @@ -5724,39 +5992,7 @@ main(int argc, char **argv) } if (internal_script_used) - { - /* - * get the scaling factor that should be same as count(*) from - * pgbench_branches if this is not a custom query - */ - res = PQexec(con, "select count(*) from pgbench_branches"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE); - - fprintf(stderr, "%s", PQerrorMessage(con)); - if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0) - { - fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con)); - } - - exit(1); - } - scale = atoi(PQgetvalue(res, 0, 0)); - if (scale < 0) - { - fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n", - PQgetvalue(res, 0, 0)); - exit(1); - } - PQclear(res); - - /* warn if we override user-given -s switch */ - if (scale_given) - fprintf(stderr, - "scale option ignored, using count from pgbench_branches table (%d)\n", - scale); - } + GetTableInfo(con, scale_given); /* * :scale variables normally get -s or database scale, but don't override diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index b82d3f65c4..98e170e0c9 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -58,6 +58,20 @@ sub pgbench return; } +# tablespace for testing, because partitioned tables cannot use pg_default +# explicitely and we want to test that table creation with tablespace works +# for partitioned tables. +my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir'; +mkdir $ts or die "cannot create directory $ts"; +# this takes care of WIN-specific path issues +my $ets = TestLib::perl2host($ts); +# add SQL single-quote string escaping, in case the path contains a quote +$ets =~ s/'/''/; + +$node->safe_psql('postgres', + "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';" +); + # Test concurrent OID generation via pg_enum_oid_index. This indirectly # exercises LWLock and spinlock concurrency. my $labels = join ',', map { "'l$_'" } 1 .. 1000; @@ -100,12 +114,13 @@ pgbench( # Again, with all possible options pgbench( - '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default', + '--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash', 0, [qr{^$}i], [ qr{dropping old tables}, qr{creating tables}, + qr{creating 2 partitions}, qr{vacuuming}, qr{creating primary keys}, qr{creating foreign keys}, @@ -116,12 +131,13 @@ pgbench( # Test interaction of --init-steps with legacy step-selection options pgbench( - '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables', + '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', 0, [qr{^$}], [ qr{dropping old tables}, qr{creating tables}, + qr{creating 3 partitions}, qr{creating primary keys}, qr{.* of .* tuples \(.*\) done}, qr{creating foreign keys}, @@ -910,5 +926,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10, qr{^\d \d{1,2} \d+ \d \d+ \d+$}); # done +$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts'); $node->stop; done_testing(); diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index f7fa18418b..1e9542af3f 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -157,6 +157,13 @@ my @options = ( qr{error while setting random seed from --random-seed option} ] ], + [ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ], + [ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ], + [ + 'partition method without partitioning', + '-i --partition-method=hash', + [ qr{partition-method requires greater than zero --partitions} ] + ], # logging sub-options [