Hi All, Having pg_dump dump tables in clustered index order is something we've found we've needed a fair number of times (for ex. when copying a large logging tables or sets of tables out of one database where the order is not maintained into another for running a bunch of backend analysis) as it saves us the clustering step which is often longer then the copy step itself.
I wanted to gauge the interest in adding an option for this to pg_dump. A (not production ready) patch that we've been using off of the 9.1.2 tag to implement this is attached or can be viewed here<https://github.com/tgarnett/postgres/commit/d4412aa4047e7a0822ee93fa47a1c0d282cb7925>. It adds a --cluster-order option to pg_dump. If people have any suggestions on better ways of pulling out the order clause or other improvements that would be great too. Tim
From d4412aa4047e7a0822ee93fa47a1c0d282cb7925 Mon Sep 17 00:00:00 2001 From: Timothy Garnett <tgarn...@panjiva.com> Date: Fri, 10 Feb 2012 16:21:32 -0500 Subject: [PATCH] Support for pg_dump to dump tables in cluster order if a clustered index is defined on the table, a little hacked in with how the data is passed around and how the order is pulled out of the db. The latter is the only semi-problematic part as you might be able to generate (very odd) table column names that would break the regex used there which would cause the sql query to be invalid and therefore not dump data for that table. But as long as you don't name an clustered column/function something like "foo ) WHERE" or the like should be ok. --- src/bin/pg_dump/pg_dump.c | 48 +++++++++++++++++++++++++++++++++++++++++--- src/bin/pg_dump/pg_dump.h | 1 + 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 57f2ed3..9ef9a71 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -134,6 +134,7 @@ static int binary_upgrade = 0; static int disable_dollar_quoting = 0; static int dump_inserts = 0; +static int cluster_order = 0; static int column_inserts = 0; static int no_security_labels = 0; static int no_unlogged_table_data = 0; @@ -319,6 +320,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, */ {"attribute-inserts", no_argument, &column_inserts, 1}, {"binary-upgrade", no_argument, &binary_upgrade, 1}, + {"cluster-order", no_argument, &cluster_order, 1}, {"column-inserts", no_argument, &column_inserts, 1}, {"disable-dollar-quoting", no_argument, &disable_dollar_quoting, 1}, {"disable-triggers", no_argument, &disable_triggers, 1}, @@ -849,6 +851,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, printf(_(" -T, --exclude-table=TABLE do NOT dump the named table(s)\n")); printf(_(" -x, --no-privileges do not dump privileges (grant/revoke)\n")); printf(_(" --binary-upgrade for use by upgrade utilities only\n")); + printf(_(" --cluster-order dump table data in clustered index order (>= 8.2)\n")); printf(_(" --column-inserts dump data as INSERT commands with column names\n")); printf(_(" --disable-dollar-quoting disable dollar quoting, use SQL standard quoting\n")); printf(_(" --disable-triggers disable triggers during data-only restore\n")); @@ -1245,7 +1248,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, classname), column_list); } - else if (tdinfo->filtercond) + else if (tdinfo->filtercond || tbinfo->ordercond) { /* Note: this syntax is only supported in 8.2 and up */ appendPQExpBufferStr(q, "COPY (SELECT "); @@ -1257,10 +1260,14 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, } else appendPQExpBufferStr(q, "* "); - appendPQExpBuffer(q, "FROM %s %s) TO stdout;", + appendPQExpBuffer(q, "FROM %s ", fmtQualifiedId(tbinfo->dobj.namespace->dobj.name, - classname), - tdinfo->filtercond); + classname)); + if (tdinfo->filtercond) + appendPQExpBuffer(q, "%s ", tdinfo->filtercond); + if (tbinfo->ordercond) + appendPQExpBuffer(q, "%s", tbinfo->ordercond); + appendPQExpBuffer(q, ") TO stdout;"); } else { @@ -1388,6 +1395,8 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, } if (tdinfo->filtercond) appendPQExpBuffer(q, " %s", tdinfo->filtercond); + if (tbinfo->ordercond) + appendPQExpBuffer(q, " %s", tbinfo->ordercond); res = PQexec(g_conn, q->data); check_sql_result(res, g_conn, q->data, PGRES_COMMAND_OK); @@ -4400,6 +4409,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, i_oid, i_indexname, i_indexdef, + i_indexdeforderclause, i_indnkeys, i_indkey, i_indisclustered, @@ -4451,6 +4461,14 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + /* + * FIXME - regex is not the best way to to do this, but + * indexdef is consistent enough that it should work + * Any missing corner cases worth worrying about? + * A column named "foo ) WITH" would be problematic... + * but would need a parser to cover all possible cases + */ + "CASE WHEN i.indisclustered THEN 'ORDER BY ' || regexp_replace(pg_catalog.pg_get_indexdef(i.indexrelid), E'^CREATE (UNIQUE )?INDEX (CONCURRENTLY )?.* ON .* USING [^\\\\(]*\\\\((.*?)\\\\)($| WITH .*| WHERE .*| TABLESPACE .*)', E'\\\\3') ELSE NULL END AS indexdeforderclause, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "c.contype, c.conname, " @@ -4476,6 +4494,14 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + /* + * FIXME - regex is not the best way to to do this, but + * indexdef is consistent enough that it should work + * Any missing corner cases worth worrying about? + * A column named "foo ) WITH" would be problematic... + * but would need a parser to cover all possible cases + */ + "CASE WHEN i.indisclustered THEN 'ORDER BY ' || regexp_replace(pg_catalog.pg_get_indexdef(i.indexrelid), E'^CREATE (UNIQUE )?INDEX (CONCURRENTLY )?.* ON .* USING [^\\\\(]*\\\\((.*?)\\\\)($| WITH .*| WHERE .*| TABLESPACE .*)', E'\\\\3') ELSE NULL END AS indexdeforderclause, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "c.contype, c.conname, " @@ -4504,6 +4530,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "NULL AS indexdeforderclause, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "c.contype, c.conname, " @@ -4532,6 +4559,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "NULL AS indexdeforderclause, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " "c.contype, c.conname, " @@ -4560,6 +4588,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " "pg_get_indexdef(i.indexrelid) AS indexdef, " + "NULL AS indexdeforderclause, " "t.relnatts AS indnkeys, " "i.indkey, false AS indisclustered, " "CASE WHEN i.indisprimary THEN 'p'::char " @@ -4586,6 +4615,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, "t.oid, " "t.relname AS indexname, " "pg_get_indexdef(i.indexrelid) AS indexdef, " + "NULL AS indexdeforderclause, " "t.relnatts AS indnkeys, " "i.indkey, false AS indisclustered, " "CASE WHEN i.indisprimary THEN 'p'::char " @@ -4614,6 +4644,7 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, i_oid = PQfnumber(res, "oid"); i_indexname = PQfnumber(res, "indexname"); i_indexdef = PQfnumber(res, "indexdef"); + i_indexdeforderclause = PQfnumber(res, "indexdeforderclause"); i_indnkeys = PQfnumber(res, "indnkeys"); i_indkey = PQfnumber(res, "indkey"); i_indisclustered = PQfnumber(res, "indisclustered"); @@ -4701,6 +4732,15 @@ static void check_sql_result(PGresult *res, PGconn *conn, const char *query, /* Plain secondary index */ indxinfo[j].indexconstraint = 0; } + + /* + * propogate table order clause if clustered index for this + * table and --cluster-order is specified + */ + if (cluster_order && g_fout->remoteVersion >= 80200 && + indxinfo[j].indisclustered && !tbinfo->ordercond && + PQgetvalue(res, j, i_indexdeforderclause)) + tbinfo->ordercond = strdup(PQgetvalue(res, j, i_indexdeforderclause)); } PQclear(res); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index c95614b..be6b444 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -294,6 +294,7 @@ int numParents; /* number of (immediate) parent tables */ struct _tableInfo **parents; /* TableInfos of immediate parents */ struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */ + char *ordercond; /* ORDER condition to order dumped rows, should prob. be in dataObj, but we find it out from the indexes before that's created (though arguably should do it's own query since that's the only way to support --data-only with --cluster-order) */ } TableInfo; typedef struct _attrDefInfo -- 1.7.5.4
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers