Attached patch implements WHERE clauses for pg_dump.

This is useful for producing data samples of a database

e.g. pg_dump -w "ctid < '(1000,1)' or random() < 0.1"

and can also be used for taking incremental backups, if data columns
exist to make a partial dump sensible.

e.g. pg_dump -w "last_update_timestamp > ...."

Columns such as this are very common because of optimistic locking
techniques in many databases.

This is designed to be used in conjunction with the TOM utility, and the
forthcoming patch to implement stats hooks. Taken together these
features will allow the ability to take a cut-down database environment
for testing, yet with statistics matching the main production database.

It was easier to write it and then discuss, since I needed to check the
feasibility of the idea before presenting it.

-- 
 Simon Riggs           www.2ndQuadrant.com
 PostgreSQL Training, Services and Support
Index: doc/src/sgml/ref/pg_dump.sgml
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/doc/src/sgml/ref/pg_dump.sgml,v
retrieving revision 1.103
diff -c -r1.103 pg_dump.sgml
*** doc/src/sgml/ref/pg_dump.sgml	20 Jul 2008 18:43:30 -0000	1.103
--- doc/src/sgml/ref/pg_dump.sgml	25 Jul 2008 08:29:25 -0000
***************
*** 674,679 ****
--- 674,696 ----
       </varlistentry>
  
       <varlistentry>
+       <term><option>-w <replaceable>SQL where clause</replaceable></option></term>
+       <term><option>--where=<replaceable class="parameter">SQL where clause</replaceable></option></term>
+       <listitem>
+        <para>
+         Dumps data only for those rows specified. When this parameter is not 
+         specified the default is all rows. The <option>where</> clause
+         is applied to all tables dumped, so any columns named must be present
+         on all tables being dumped or <application>pg_dump</application>
+         will return an error. The phrase <quote>where</quote> need not be used,
+         since this will be added automatically. This option is ignored if
+         no data is dumped. <option>-w</> cannot currently be used at the same
+         time as <option>-o/--oids</>.
+        </para>
+       </listitem>
+      </varlistentry>
+ 
+      <varlistentry>
        <term><option>-W</option></term>
        <term><option>--password</option></term>
        <listitem>
***************
*** 875,880 ****
--- 892,908 ----
    </para>
  
    <para>
+    You can specify a data sample using <option>-w</>. An example would be to
+    dump all tables less than 8MB in full, while only a random 10% of rows for
+    any table 8MB or larger. Note that this may not dump all foreign key data
+    correctly, so choose your extract carefully for your own database.
+ 
+ <screen>
+ <prompt>$</prompt> <userinput>pg_dump -w "ctid < '(1000,1)' or random() > 0.1" mydb &gt; db.sql</userinput>
+ </screen>
+   </para>
+ 
+   <para>
     To dump all database objects except for tables whose names begin with
     <literal>ts_</literal>:
  
Index: src/bin/pg_dump/pg_dump.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/bin/pg_dump/pg_dump.c,v
retrieving revision 1.497
diff -c -r1.497 pg_dump.c
*** src/bin/pg_dump/pg_dump.c	20 Jul 2008 18:43:30 -0000	1.497
--- src/bin/pg_dump/pg_dump.c	25 Jul 2008 08:34:05 -0000
***************
*** 95,100 ****
--- 95,102 ----
  static SimpleStringList table_exclude_patterns = {NULL, NULL};
  static SimpleOidList table_exclude_oids = {NULL, NULL};
  
+ static	const char *where_clause = NULL;
+ 
  /* default, if no "inclusion" switches appear, is to dump everything */
  static bool include_everything = true;
  
***************
*** 188,194 ****
  static void dumpEncoding(Archive *AH);
  static void dumpStdStrings(Archive *AH);
  static const char *getAttrName(int attrnum, TableInfo *tblInfo);
! static const char *fmtCopyColumnList(const TableInfo *ti);
  static void do_sql_command(PGconn *conn, const char *query);
  static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
  				 ExecStatusType expected);
--- 190,196 ----
  static void dumpEncoding(Archive *AH);
  static void dumpStdStrings(Archive *AH);
  static const char *getAttrName(int attrnum, TableInfo *tblInfo);
! static const char *fmtCopyColumnList(const TableInfo *ti, bool with_brackets);
  static void do_sql_command(PGconn *conn, const char *query);
  static void check_sql_result(PGresult *res, PGconn *conn, const char *query,
  				 ExecStatusType expected);
***************
*** 250,255 ****
--- 252,258 ----
  		{"superuser", required_argument, NULL, 'S'},
  		{"table", required_argument, NULL, 't'},
  		{"exclude-table", required_argument, NULL, 'T'},
+ 		{"where", required_argument, NULL, 'w'},
  		{"password", no_argument, NULL, 'W'},
  		{"username", required_argument, NULL, 'U'},
  		{"verbose", no_argument, NULL, 'v'},
***************
*** 303,309 ****
  		}
  	}
  
! 	while ((c = getopt_long(argc, argv, "abcCdDE:f:F:h:in:N:oOp:RsS:t:T:U:vWxX:Z:",
  							long_options, &optindex)) != -1)
  	{
  		switch (c)
--- 306,312 ----
  		}
  	}
  
! 	while ((c = getopt_long(argc, argv, "abcCdDE:f:F:h:in:N:oOp:RsS:t:T:U:vw:WxX:Z:",
  							long_options, &optindex)) != -1)
  	{
  		switch (c)
***************
*** 404,409 ****
--- 407,416 ----
  				g_verbose = true;
  				break;
  
+ 			case 'w':
+ 				where_clause = optarg;
+ 				break;
+ 
  			case 'W':
  				force_password = true;
  				break;
***************
*** 483,488 ****
--- 490,501 ----
  		exit(1);
  	}
  
+ 	if (where_clause && oids)
+ 	{
+ 		write_msg(NULL, "options -w/--where and -o/--oids cannot be used together\n");
+ 		exit(1);
+ 	}
+ 
  	/* open the output file */
  	if (pg_strcasecmp(format, "a") == 0 || pg_strcasecmp(format, "append") == 0)
  	{
***************
*** 788,793 ****
--- 801,807 ----
  			 "                              plain text format\n"));
  	printf(_("  -t, --table=TABLE           dump the named table(s) only\n"));
  	printf(_("  -T, --exclude-table=TABLE   do NOT dump the named table(s)\n"));
+ 	printf(_("  -w, --where=SQL             dump data only for rows matching where clause\n"));
  	printf(_("  -x, --no-privileges         do not dump privileges (grant/revoke)\n"));
  	printf(_("  --disable-dollar-quoting    disable dollar quoting, use SQL standard quoting\n"));
  	printf(_("  --disable-triggers          disable triggers during data-only restore\n"));
***************
*** 1060,1082 ****
  	 * cases involving ADD COLUMN and inheritance.)
  	 */
  	if (g_fout->remoteVersion >= 70300)
! 		column_list = fmtCopyColumnList(tbinfo);
  	else
  		column_list = "";		/* can't select columns in COPY */
  
! 	if (oids && hasoids)
  	{
! 		appendPQExpBuffer(q, "COPY %s %s WITH OIDS TO stdout;",
  						  fmtQualifiedId(tbinfo->dobj.namespace->dobj.name,
  										 classname),
! 						  column_list);
  	}
  	else
  	{
! 		appendPQExpBuffer(q, "COPY %s %s TO stdout;",
  						  fmtQualifiedId(tbinfo->dobj.namespace->dobj.name,
  										 classname),
! 						  column_list);
  	}
  	res = PQexec(g_conn, q->data);
  	check_sql_result(res, g_conn, q->data, PGRES_COPY_OUT);
--- 1074,1099 ----
  	 * cases involving ADD COLUMN and inheritance.)
  	 */
  	if (g_fout->remoteVersion >= 70300)
! 		column_list = fmtCopyColumnList(tbinfo, (where_clause == NULL));
  	else
  		column_list = "";		/* can't select columns in COPY */
  
! 	if (where_clause)
  	{
! 		appendPQExpBuffer(q, "COPY (SELECT %s FROM %s WHERE %s) TO stdout %s;",
! 						  column_list,
  						  fmtQualifiedId(tbinfo->dobj.namespace->dobj.name,
  										 classname),
! 						  where_clause,
! 						  ((oids && hasoids) ? "WITH OIDS" : ""));
  	}
  	else
  	{
! 		appendPQExpBuffer(q, "COPY %s %s TO stdout %s;",
  						  fmtQualifiedId(tbinfo->dobj.namespace->dobj.name,
  										 classname),
! 						  column_list,
! 						  ((oids && hasoids) ? "WITH OIDS" : ""));
  	}
  	res = PQexec(g_conn, q->data);
  	check_sql_result(res, g_conn, q->data, PGRES_COPY_OUT);
***************
*** 1331,1337 ****
  		appendPQExpBuffer(copyBuf, "COPY %s ",
  						  fmtId(tbinfo->dobj.name));
  		appendPQExpBuffer(copyBuf, "%s %sFROM stdin;\n",
! 						  fmtCopyColumnList(tbinfo),
  					  (tdinfo->oids && tbinfo->hasoids) ? "WITH OIDS " : "");
  		copyStmt = copyBuf->data;
  	}
--- 1348,1354 ----
  		appendPQExpBuffer(copyBuf, "COPY %s ",
  						  fmtId(tbinfo->dobj.name));
  		appendPQExpBuffer(copyBuf, "%s %sFROM stdin;\n",
! 						  fmtCopyColumnList(tbinfo, true),
  					  (tdinfo->oids && tbinfo->hasoids) ? "WITH OIDS " : "");
  		copyStmt = copyBuf->data;
  	}
***************
*** 10492,10498 ****
   * "", not an invalid "()" column list.
   */
  static const char *
! fmtCopyColumnList(const TableInfo *ti)
  {
  	static PQExpBuffer q = NULL;
  	int			numatts = ti->numatts;
--- 10509,10515 ----
   * "", not an invalid "()" column list.
   */
  static const char *
! fmtCopyColumnList(const TableInfo *ti, bool with_brackets)
  {
  	static PQExpBuffer q = NULL;
  	int			numatts = ti->numatts;
***************
*** 10506,10512 ****
  	else
  		q = createPQExpBuffer();
  
! 	appendPQExpBuffer(q, "(");
  	needComma = false;
  	for (i = 0; i < numatts; i++)
  	{
--- 10523,10530 ----
  	else
  		q = createPQExpBuffer();
  
! 	if (with_brackets)
! 		appendPQExpBuffer(q, "(");
  	needComma = false;
  	for (i = 0; i < numatts; i++)
  	{
***************
*** 10521,10527 ****
  	if (!needComma)
  		return "";				/* no undropped columns */
  
! 	appendPQExpBuffer(q, ")");
  	return q->data;
  }
  
--- 10539,10546 ----
  	if (!needComma)
  		return "";				/* no undropped columns */
  
! 	if (with_brackets)
! 		appendPQExpBuffer(q, ")");
  	return q->data;
  }
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to