Attached patch implements WHERE clauses for pg_dump. This is useful for producing data samples of a database
e.g. pg_dump -w "ctid < '(1000,1)' or random() < 0.1" and can also be used for taking incremental backups, if data columns exist to make a partial dump sensible. e.g. pg_dump -w "last_update_timestamp > ...." Columns such as this are very common because of optimistic locking techniques in many databases. This is designed to be used in conjunction with the TOM utility, and the forthcoming patch to implement stats hooks. Taken together these features will allow the ability to take a cut-down database environment for testing, yet with statistics matching the main production database. It was easier to write it and then discuss, since I needed to check the feasibility of the idea before presenting it. -- Simon Riggs www.2ndQuadrant.com PostgreSQL Training, Services and Support
Index: doc/src/sgml/ref/pg_dump.sgml =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/doc/src/sgml/ref/pg_dump.sgml,v retrieving revision 1.103 diff -c -r1.103 pg_dump.sgml *** doc/src/sgml/ref/pg_dump.sgml 20 Jul 2008 18:43:30 -0000 1.103 --- doc/src/sgml/ref/pg_dump.sgml 25 Jul 2008 08:29:25 -0000 *************** *** 674,679 **** --- 674,696 ---- </varlistentry> <varlistentry> + <term><option>-w <replaceable>SQL where clause</replaceable></option></term> + <term><option>--where=<replaceable class="parameter">SQL where clause</replaceable></option></term> + <listitem> + <para> + Dumps data only for those rows specified. When this parameter is not + specified the default is all rows. The <option>where</> clause + is applied to all tables dumped, so any columns named must be present + on all tables being dumped or <application>pg_dump</application> + will return an error. The phrase <quote>where</quote> need not be used, + since this will be added automatically. This option is ignored if + no data is dumped. <option>-w</> cannot currently be used at the same + time as <option>-o/--oids</>. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-W</option></term> <term><option>--password</option></term> <listitem> *************** *** 875,880 **** --- 892,908 ---- </para> <para> + You can specify a data sample using <option>-w</>. An example would be to + dump all tables less than 8MB in full, while only a random 10% of rows for + any table 8MB or larger. Note that this may not dump all foreign key data + correctly, so choose your extract carefully for your own database. + + <screen> + <prompt>$</prompt> <userinput>pg_dump -w "ctid < '(1000,1)' or random() > 0.1" mydb > db.sql</userinput> + </screen> + </para> + + <para> To dump all database objects except for tables whose names begin with <literal>ts_</literal>: Index: src/bin/pg_dump/pg_dump.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/bin/pg_dump/pg_dump.c,v retrieving revision 1.497 diff -c -r1.497 pg_dump.c *** src/bin/pg_dump/pg_dump.c 20 Jul 2008 18:43:30 -0000 1.497 --- src/bin/pg_dump/pg_dump.c 25 Jul 2008 08:34:05 -0000 *************** *** 95,100 **** --- 95,102 ---- static SimpleStringList table_exclude_patterns = {NULL, NULL}; static SimpleOidList table_exclude_oids = {NULL, NULL}; + static const char *where_clause = NULL; + /* default, if no "inclusion" switches appear, is to dump everything */ static bool include_everything = true; *************** *** 188,194 **** static void dumpEncoding(Archive *AH); static void dumpStdStrings(Archive *AH); static const char *getAttrName(int attrnum, TableInfo *tblInfo); ! static const char *fmtCopyColumnList(const TableInfo *ti); static void do_sql_command(PGconn *conn, const char *query); static void check_sql_result(PGresult *res, PGconn *conn, const char *query, ExecStatusType expected); --- 190,196 ---- static void dumpEncoding(Archive *AH); static void dumpStdStrings(Archive *AH); static const char *getAttrName(int attrnum, TableInfo *tblInfo); ! static const char *fmtCopyColumnList(const TableInfo *ti, bool with_brackets); static void do_sql_command(PGconn *conn, const char *query); static void check_sql_result(PGresult *res, PGconn *conn, const char *query, ExecStatusType expected); *************** *** 250,255 **** --- 252,258 ---- {"superuser", required_argument, NULL, 'S'}, {"table", required_argument, NULL, 't'}, {"exclude-table", required_argument, NULL, 'T'}, + {"where", required_argument, NULL, 'w'}, {"password", no_argument, NULL, 'W'}, {"username", required_argument, NULL, 'U'}, {"verbose", no_argument, NULL, 'v'}, *************** *** 303,309 **** } } ! while ((c = getopt_long(argc, argv, "abcCdDE:f:F:h:in:N:oOp:RsS:t:T:U:vWxX:Z:", long_options, &optindex)) != -1) { switch (c) --- 306,312 ---- } } ! while ((c = getopt_long(argc, argv, "abcCdDE:f:F:h:in:N:oOp:RsS:t:T:U:vw:WxX:Z:", long_options, &optindex)) != -1) { switch (c) *************** *** 404,409 **** --- 407,416 ---- g_verbose = true; break; + case 'w': + where_clause = optarg; + break; + case 'W': force_password = true; break; *************** *** 483,488 **** --- 490,501 ---- exit(1); } + if (where_clause && oids) + { + write_msg(NULL, "options -w/--where and -o/--oids cannot be used together\n"); + exit(1); + } + /* open the output file */ if (pg_strcasecmp(format, "a") == 0 || pg_strcasecmp(format, "append") == 0) { *************** *** 788,793 **** --- 801,807 ---- " plain text format\n")); printf(_(" -t, --table=TABLE dump the named table(s) only\n")); printf(_(" -T, --exclude-table=TABLE do NOT dump the named table(s)\n")); + printf(_(" -w, --where=SQL dump data only for rows matching where clause\n")); printf(_(" -x, --no-privileges do not dump privileges (grant/revoke)\n")); printf(_(" --disable-dollar-quoting disable dollar quoting, use SQL standard quoting\n")); printf(_(" --disable-triggers disable triggers during data-only restore\n")); *************** *** 1060,1082 **** * cases involving ADD COLUMN and inheritance.) */ if (g_fout->remoteVersion >= 70300) ! column_list = fmtCopyColumnList(tbinfo); else column_list = ""; /* can't select columns in COPY */ ! if (oids && hasoids) { ! appendPQExpBuffer(q, "COPY %s %s WITH OIDS TO stdout;", fmtQualifiedId(tbinfo->dobj.namespace->dobj.name, classname), ! column_list); } else { ! appendPQExpBuffer(q, "COPY %s %s TO stdout;", fmtQualifiedId(tbinfo->dobj.namespace->dobj.name, classname), ! column_list); } res = PQexec(g_conn, q->data); check_sql_result(res, g_conn, q->data, PGRES_COPY_OUT); --- 1074,1099 ---- * cases involving ADD COLUMN and inheritance.) */ if (g_fout->remoteVersion >= 70300) ! column_list = fmtCopyColumnList(tbinfo, (where_clause == NULL)); else column_list = ""; /* can't select columns in COPY */ ! if (where_clause) { ! appendPQExpBuffer(q, "COPY (SELECT %s FROM %s WHERE %s) TO stdout %s;", ! column_list, fmtQualifiedId(tbinfo->dobj.namespace->dobj.name, classname), ! where_clause, ! ((oids && hasoids) ? "WITH OIDS" : "")); } else { ! appendPQExpBuffer(q, "COPY %s %s TO stdout %s;", fmtQualifiedId(tbinfo->dobj.namespace->dobj.name, classname), ! column_list, ! ((oids && hasoids) ? "WITH OIDS" : "")); } res = PQexec(g_conn, q->data); check_sql_result(res, g_conn, q->data, PGRES_COPY_OUT); *************** *** 1331,1337 **** appendPQExpBuffer(copyBuf, "COPY %s ", fmtId(tbinfo->dobj.name)); appendPQExpBuffer(copyBuf, "%s %sFROM stdin;\n", ! fmtCopyColumnList(tbinfo), (tdinfo->oids && tbinfo->hasoids) ? "WITH OIDS " : ""); copyStmt = copyBuf->data; } --- 1348,1354 ---- appendPQExpBuffer(copyBuf, "COPY %s ", fmtId(tbinfo->dobj.name)); appendPQExpBuffer(copyBuf, "%s %sFROM stdin;\n", ! fmtCopyColumnList(tbinfo, true), (tdinfo->oids && tbinfo->hasoids) ? "WITH OIDS " : ""); copyStmt = copyBuf->data; } *************** *** 10492,10498 **** * "", not an invalid "()" column list. */ static const char * ! fmtCopyColumnList(const TableInfo *ti) { static PQExpBuffer q = NULL; int numatts = ti->numatts; --- 10509,10515 ---- * "", not an invalid "()" column list. */ static const char * ! fmtCopyColumnList(const TableInfo *ti, bool with_brackets) { static PQExpBuffer q = NULL; int numatts = ti->numatts; *************** *** 10506,10512 **** else q = createPQExpBuffer(); ! appendPQExpBuffer(q, "("); needComma = false; for (i = 0; i < numatts; i++) { --- 10523,10530 ---- else q = createPQExpBuffer(); ! if (with_brackets) ! appendPQExpBuffer(q, "("); needComma = false; for (i = 0; i < numatts; i++) { *************** *** 10521,10527 **** if (!needComma) return ""; /* no undropped columns */ ! appendPQExpBuffer(q, ")"); return q->data; } --- 10539,10546 ---- if (!needComma) return ""; /* no undropped columns */ ! if (with_brackets) ! appendPQExpBuffer(q, ")"); return q->data; }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers