On Mon, Feb 11, 2019 at 10:20 AM David Rowley <david.row...@2ndquadrant.com>
wrote:

> Reviewing pg_dump-rows-per-insert-option-v14.
>
Also, maybe one for Fabien (because he seems keen on keeping the
> --rows-per-insert validation code)
>
> strtol() returns a long. dump_inserts is an int, so on machines where
> sizeof(long) == 8 and sizeof(int) == 4 (most machines, these days) the
> validation is not bulletproof.  This could lead to:
>
> $ pg_dump --rows-per-insert=2147483648
> pg_dump: rows-per-insert must be a positive number
>

fixed


> For me, I was fine with the atoi() code that the other options use,
> but maybe Fabien has a problem with the long vs int?
>


The main issue with atoi() is it does not detect errors and return 0 for
both invalid input and input value 0 but in our case it doesn't case a
problem because it error out for value 0. but for example in compress Level
if invalid input supplied it silently precede as input value 0 is supplied.

regards
Surafel
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 9e0bb93f08..0ab57067a8 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -661,9 +661,9 @@ PostgreSQL documentation
         ...</literal>).  This will make restoration very slow; it is mainly
         useful for making dumps that can be loaded into
         non-<productname>PostgreSQL</productname> databases.
-        However, since this option generates a separate command for each row,
-        an error in reloading a row causes only that row to be lost rather
-        than the entire table contents.
+        However, since by default the option generates a separate command
+        for each row, an error in reloading a row causes only that row to be
+        lost rather than the entire table contents.
        </para>
       </listitem>
      </varlistentry>
@@ -764,11 +764,10 @@ PostgreSQL documentation
         than <command>COPY</command>).  This will make restoration very slow;
         it is mainly useful for making dumps that can be loaded into
         non-<productname>PostgreSQL</productname> databases.
-        However, since this option generates a separate command for each row,
-        an error in reloading a row causes only that row to be lost rather
-        than the entire table contents.
-        Note that
-        the restore might fail altogether if you have rearranged column order.
+        However, since by default the option generates a separate command
+        for each row, an error in reloading a row causes only that row to be
+        lost rather than the entire table contents.  Note that the restore
+        might fail altogether if you have rearranged column order.
         The <option>--column-inserts</option> option is safe against column
         order changes, though even slower.
        </para>
@@ -914,8 +913,9 @@ PostgreSQL documentation
        <para>
         Add <literal>ON CONFLICT DO NOTHING</literal> to
         <command>INSERT</command> commands.
-        This option is not valid unless <option>--inserts</option> or
-        <option>--column-inserts</option> is also specified.
+        This option is not valid unless <option>--inserts</option>,
+        <option>--column-inserts</option> or
+        <option>--rows-per-insert</option> is also specified.
        </para>
       </listitem>
      </varlistentry>
@@ -938,6 +938,18 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--rows-per-insert=<replaceable class="parameter">nrows</replaceable></option></term>
+      <listitem>
+       <para>
+        Dump data as <command>INSERT</command> commands (rather than
+        <command>COPY</command>).  Controls the maximum number of rows per
+        <command>INSERT</command> statement. The value specified must be a
+        number greater than zero.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
        <term><option>--section=<replaceable class="parameter">sectionname</replaceable></option></term>
        <listitem>
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 4a2e122e2d..7ab27391fb 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -140,10 +140,10 @@ typedef struct _dumpOptions
 	int			dumpSections;	/* bitmask of chosen sections */
 	bool		aclsSkip;
 	const char *lockWaitTimeout;
+	int			dump_inserts;	/* 0 = COPY, otherwise rows per INSERT */
 
 	/* flags for various command-line long options */
 	int			disable_dollar_quoting;
-	int			dump_inserts;
 	int			column_inserts;
 	int			if_exists;
 	int			no_comments;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 3a89ad846a..c7403f4e40 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -307,6 +307,8 @@ main(int argc, char **argv)
 	const char *dumpencoding = NULL;
 	const char *dumpsnapshot = NULL;
 	char	   *use_role = NULL;
+	char       *rowPerInsertEndPtr;
+	long			rowPerInsert;
 	int			numWorkers = 1;
 	trivalue	prompt_password = TRI_DEFAULT;
 	int			compressLevel = -1;
@@ -358,7 +360,7 @@ main(int argc, char **argv)
 		{"enable-row-security", no_argument, &dopt.enable_row_security, 1},
 		{"exclude-table-data", required_argument, NULL, 4},
 		{"if-exists", no_argument, &dopt.if_exists, 1},
-		{"inserts", no_argument, &dopt.dump_inserts, 1},
+		{"inserts", no_argument, NULL, 8},
 		{"lock-wait-timeout", required_argument, NULL, 2},
 		{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
 		{"quote-all-identifiers", no_argument, &quote_all_identifiers, 1},
@@ -377,6 +379,7 @@ main(int argc, char **argv)
 		{"no-subscriptions", no_argument, &dopt.no_subscriptions, 1},
 		{"no-sync", no_argument, NULL, 7},
 		{"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1},
+		{"rows-per-insert", required_argument, NULL, 9},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -557,6 +560,37 @@ main(int argc, char **argv)
 				dosync = false;
 				break;
 
+			case 8:				/* inserts */
+				/*
+				 * dump_inserts also stores --rows-per-insert, careful not to
+				 * overwrite that.
+				 */
+				if (dopt.dump_inserts == 0)
+					dopt.dump_inserts = DUMP_DEFAULT_ROWS_PER_INSERT;
+				break;
+
+			case 9:			/* rows per insert */
+				errno = 0;
+				rowPerInsert = strtol(optarg, &rowPerInsertEndPtr, 10);
+
+				if (rowPerInsertEndPtr == optarg || *rowPerInsertEndPtr != '\0')
+				{
+					write_msg(NULL, "argument of --rows-per-insert must be a number\n");
+					exit_nicely(1);
+				}
+				if (rowPerInsert > INT_MAX)
+				{
+					write_msg(NULL, "argument of --rows-per-insert exceeds integer range.\n");
+					exit_nicely(1);
+				}
+				if (rowPerInsert <= 0)
+				{
+					write_msg(NULL, "rows-per-insert must be a positive number\n");
+					exit_nicely(1);
+				}
+				dopt.dump_inserts = rowPerInsert;
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit_nicely(1);
@@ -581,8 +615,8 @@ main(int argc, char **argv)
 	}
 
 	/* --column-inserts implies --inserts */
-	if (dopt.column_inserts)
-		dopt.dump_inserts = 1;
+	if (dopt.column_inserts && dopt.dump_inserts == 0)
+		dopt.dump_inserts = DUMP_DEFAULT_ROWS_PER_INSERT;
 
 	/*
 	 * Binary upgrade mode implies dumping sequence data even in schema-only
@@ -607,8 +641,12 @@ main(int argc, char **argv)
 	if (dopt.if_exists && !dopt.outputClean)
 		exit_horribly(NULL, "option --if-exists requires option -c/--clean\n");
 
-	if (dopt.do_nothing && !(dopt.dump_inserts || dopt.column_inserts))
-		exit_horribly(NULL, "option --on-conflict-do-nothing requires option --inserts or --column-inserts\n");
+	/*
+	 * --inserts are already implied above if --column-inserts or
+	 * --rows-per-insert were specified.
+	 */
+	if (dopt.do_nothing && dopt.dump_inserts == 0)
+		exit_horribly(NULL, "option --on-conflict-do-nothing requires option --inserts , --rows-per-insert or --column-inserts\n");
 
 	/* Identify archive format to emit */
 	archiveFormat = parseArchiveFormat(format, &archiveMode);
@@ -977,6 +1015,7 @@ help(const char *progname)
 	printf(_("  --no-unlogged-table-data     do not dump unlogged table data\n"));
 	printf(_("  --on-conflict-do-nothing     add ON CONFLICT DO NOTHING to INSERT commands\n"));
 	printf(_("  --quote-all-identifiers      quote all identifiers, even if not key words\n"));
+	printf(_("  --rows-per-insert=NROWS      number of row per INSERT command\n"));
 	printf(_("  --section=SECTION            dump named section (pre-data, data, or post-data)\n"));
 	printf(_("  --serializable-deferrable    wait until the dump can run without anomalies\n"));
 	printf(_("  --snapshot=SNAPSHOT          use given snapshot for the dump\n"));
@@ -1886,6 +1925,8 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 	int			tuple;
 	int			nfields;
 	int			field;
+	int			rows_per_statement = dopt->dump_inserts;
+	int			rows_this_statement = 0;
 
 	appendPQExpBuffer(q, "DECLARE _pg_dump_cursor CURSOR FOR "
 					  "SELECT * FROM ONLY %s",
@@ -1900,68 +1941,86 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		res = ExecuteSqlQuery(fout, "FETCH 100 FROM _pg_dump_cursor",
 							  PGRES_TUPLES_OK);
 		nfields = PQnfields(res);
+
+		/*
+		 * First time through, we build as much of the INSERT statement as
+		 * possible in "insertStmt", which we can then just print for each
+		 * line. If the table happens to have zero columns then this will
+		 * be a complete statement, otherwise it will end in "VALUES " and
+		 * be ready to have the row's column values printed.
+		 */
+		if (insertStmt == NULL)
+		{
+			TableInfo  *targettab;
+
+			insertStmt = createPQExpBuffer();
+
+			/*
+			 * When load-via-partition-root is set, get the root table
+			 * name for the partition table, so that we can reload data
+			 * through the root table.
+			 */
+			if (dopt->load_via_partition_root && tbinfo->ispartition)
+				targettab = getRootTableInfo(tbinfo);
+			else
+				targettab = tbinfo;
+
+			appendPQExpBuffer(insertStmt, "INSERT INTO %s ",
+							  fmtQualifiedDumpable(targettab));
+
+			/* corner case for zero-column table */
+			if (nfields == 0)
+			{
+				appendPQExpBufferStr(insertStmt, "DEFAULT VALUES;\n");
+			}
+			else
+			{
+				/* append the list of column names if required */
+				if (dopt->column_inserts)
+				{
+					appendPQExpBufferChar(insertStmt, '(');
+					for (field = 0; field < nfields; field++)
+					{
+						if (field > 0)
+							appendPQExpBufferStr(insertStmt, ", ");
+						appendPQExpBufferStr(insertStmt,
+											 fmtId(PQfname(res, field)));
+					}
+					appendPQExpBufferStr(insertStmt, ") ");
+				}
+
+				if (tbinfo->needs_override)
+					appendPQExpBufferStr(insertStmt, "OVERRIDING SYSTEM VALUE ");
+
+				appendPQExpBufferStr(insertStmt, "VALUES ");
+			}
+		}
+
 		for (tuple = 0; tuple < PQntuples(res); tuple++)
 		{
+			/* Write the INSERT if not in the middle of a multi-row INSERT. */
+			if (rows_this_statement == 0)
+				archputs(insertStmt->data, fout);
+
+
 			/*
-			 * First time through, we build as much of the INSERT statement as
-			 * possible in "insertStmt", which we can then just print for each
-			 * line. If the table happens to have zero columns then this will
-			 * be a complete statement, otherwise it will end in "VALUES(" and
-			 * be ready to have the row's column values appended.
+			 * If it is zero-column table then we've aleady written the
+			 * complete statement, which will mean we've disobeyed
+			 * --rows-per-insert when it's set greater than 1.  We do support
+			 * a way to make this multi-row with:
+			 * SELECT UNION ALL SELECT UNION ALL ... but that's non-standard
+			 * so likely we should avoid it given that using INSERTs is
+			 * mostly only ever needed for cross-database exports.
 			 */
-			if (insertStmt == NULL)
-			{
-				TableInfo  *targettab;
-
-				insertStmt = createPQExpBuffer();
-
-				/*
-				 * When load-via-partition-root is set, get the root table
-				 * name for the partition table, so that we can reload data
-				 * through the root table.
-				 */
-				if (dopt->load_via_partition_root && tbinfo->ispartition)
-					targettab = getRootTableInfo(tbinfo);
-				else
-					targettab = tbinfo;
-
-				appendPQExpBuffer(insertStmt, "INSERT INTO %s ",
-								  fmtQualifiedDumpable(targettab));
-
-				/* corner case for zero-column table */
-				if (nfields == 0)
-				{
-					appendPQExpBufferStr(insertStmt, "DEFAULT VALUES;\n");
-				}
-				else
-				{
-					/* append the list of column names if required */
-					if (dopt->column_inserts)
-					{
-						appendPQExpBufferChar(insertStmt, '(');
-						for (field = 0; field < nfields; field++)
-						{
-							if (field > 0)
-								appendPQExpBufferStr(insertStmt, ", ");
-							appendPQExpBufferStr(insertStmt,
-												 fmtId(PQfname(res, field)));
-						}
-						appendPQExpBufferStr(insertStmt, ") ");
-					}
-
-					if (tbinfo->needs_override)
-						appendPQExpBufferStr(insertStmt, "OVERRIDING SYSTEM VALUE ");
-
-					appendPQExpBufferStr(insertStmt, "VALUES (");
-				}
-			}
-
-			archputs(insertStmt->data, fout);
-
-			/* if it is zero-column table then we're done */
 			if (nfields == 0)
 				continue;
 
+			if (rows_this_statement > 0)
+				archputs(", (", fout);
+			else
+				archputs("(", fout);
+
+
 			for (field = 0; field < nfields; field++)
 			{
 				if (field > 0)
@@ -2027,10 +2086,27 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 				}
 			}
 
-			if (!dopt->do_nothing)
-				archputs(");\n", fout);
+			rows_this_statement++;
+
+			/*
+			 * If we've put the target number of rows onto this statement then
+			 * we can terminate it now.
+			 */
+			if (rows_this_statement == rows_per_statement)
+			{
+				/* Reset the row counter */
+				rows_this_statement = 0;
+				if (dopt->do_nothing)
+					archputs(") ON CONFLICT DO NOTHING;\n", fout);
+				else
+					archputs(");\n", fout);
+			}
 			else
-				archputs(") ON CONFLICT DO NOTHING;\n", fout);
+			{
+				/* Otherwise, get ready for the next row. */
+				archputs(")", fout);
+			}
+
 		}
 
 		if (PQntuples(res) <= 0)
@@ -2041,6 +2117,15 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 		PQclear(res);
 	}
 
+	/* Terminate any statements that didn't make the row count.*/
+	if (rows_this_statement > 0)
+	{
+		if (dopt->do_nothing)
+			archputs(" ON CONFLICT DO NOTHING;\n", fout);
+		else
+			archputs(";\n", fout);
+	}
+
 	archputs("\n\n", fout);
 
 	ExecuteSqlStatement(fout, "CLOSE _pg_dump_cursor");
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 21d2ab05b0..59ac3d096e 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -126,6 +126,12 @@ typedef uint32 DumpComponents;	/* a bitmask of dump object components */
 		DUMP_COMPONENT_DATA |\
 		DUMP_COMPONENT_POLICY)
 
+/*
+ * The default number of rows per INSERT statement when
+ * --inserts is specified without --rows-per-insert
+ */
+#define DUMP_DEFAULT_ROWS_PER_INSERT 1
+
 typedef struct _dumpableObject
 {
 	DumpableObjectType objType;
diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl
index a875d540b8..ebd83922dd 100644
--- a/src/bin/pg_dump/t/001_basic.pl
+++ b/src/bin/pg_dump/t/001_basic.pl
@@ -118,8 +118,8 @@ command_fails_like(
 
 command_fails_like(
 	[ 'pg_dump', '--on-conflict-do-nothing' ],
-	qr/\Qpg_dump: option --on-conflict-do-nothing requires option --inserts or --column-inserts\E/,
-	'pg_dump: option --on-conflict-do-nothing requires option --inserts or --column-inserts');
+	qr/\Qpg_dump: option --on-conflict-do-nothing requires option --inserts , --rows-per-insert or --column-inserts\E/,
+	'pg_dump: option --on-conflict-do-nothing requires option --inserts , --rows-per-insert or --column-inserts');
 
 # pg_dumpall command-line argument checks
 command_fails_like(
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 245fcbf5ce..4cf028de30 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -289,6 +289,16 @@ my %pgdump_runs = (
 			"$tempdir/role_parallel",
 		],
 	},
+	rows_per_insert => {
+		dump_cmd => [
+			'pg_dump',
+			'--no-sync',
+			"--file=$tempdir/rows_per_insert.sql", '-a',
+			'--rows-per-insert=3',
+			'--table=dump_test.test_table',
+			'postgres',
+		],
+	},
 	schema_only => {
 		dump_cmd => [
 			'pg_dump',                         '--format=plain',
@@ -1287,6 +1297,13 @@ my %tests = (
 		like => { column_inserts => 1, },
 	},
 
+	'INSERT INTO test_table' => {
+		regexp => qr/^
+			(?:INSERT\ INTO\ dump_test.test_table\ VALUES\ \(\d,\ NULL,\ NULL,\ NULL\),\ \(\d,\ NULL,\ NULL,\ NULL\),\ \(\d,\ NULL,\ NULL,\ NULL\);\n){3}
+			/xm,
+		like => { rows_per_insert => 1, },
+	},
+
 	'INSERT INTO test_second_table' => {
 		regexp => qr/^
 			(?:INSERT\ INTO\ dump_test.test_second_table\ \(col1,\ col2\)

Reply via email to