Hi, here's a new version of the patch that should apply cleanly. I'll monitor the status on http://cfbot.cputube.org/
Rémi --- contrib/file_fdw/input/file_fdw.source | 7 +- contrib/file_fdw/output/file_fdw.source | 13 ++-- doc/src/sgml/ref/copy.sgml | 9 ++- src/backend/commands/copy.c | 93 ++++++++++++++++++++++--- src/test/regress/input/copy.source | 71 ++++++++++++++----- src/test/regress/output/copy.source | 58 ++++++++++----- 6 files changed, 202 insertions(+), 49 deletions(-)
diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source index 45b728eeb3..7a3983c785 100644 --- a/contrib/file_fdw/input/file_fdw.source +++ b/contrib/file_fdw/input/file_fdw.source @@ -37,7 +37,6 @@ CREATE USER MAPPING FOR regress_no_priv_user SERVER file_server; -- validator tests CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR -CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', header 'true'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', quote ':'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape ':'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', header 'true'); -- ERROR @@ -80,6 +79,12 @@ CREATE FOREIGN TABLE agg_bad ( OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null ''); ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0); +-- test header matching +CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/list1.csv', delimiter ',', header 'match'); +CREATE FOREIGN TABLE header_dont_match (a int, foo text) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/list1.csv', delimiter ',', header 'match'); -- ERROR + -- per-column options tests CREATE FOREIGN TABLE text_csv ( word1 text OPTIONS (force_not_null 'true'), diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source index 52b4d5f1df..d76a3dc6f8 100644 --- a/contrib/file_fdw/output/file_fdw.source +++ b/contrib/file_fdw/output/file_fdw.source @@ -33,14 +33,12 @@ CREATE USER MAPPING FOR regress_no_priv_user SERVER file_server; -- validator tests CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR ERROR: COPY format "xml" not recognized -CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', header 'true'); -- ERROR -ERROR: COPY HEADER available only in CSV mode CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', quote ':'); -- ERROR ERROR: COPY quote available only in CSV mode CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape ':'); -- ERROR ERROR: COPY escape available only in CSV mode CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', header 'true'); -- ERROR -ERROR: COPY HEADER available only in CSV mode +ERROR: COPY HEADER available only in CSV and text mode CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', quote ':'); -- ERROR ERROR: COPY quote available only in CSV mode CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', escape ':'); -- ERROR @@ -95,6 +93,11 @@ CREATE FOREIGN TABLE agg_bad ( ) SERVER file_server OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null ''); ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0); +-- test header matching +CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server +OPTIONS (format 'csv', filename '/Users/remi/src/postgresql/contrib/file_fdw/data/list1.csv', delimiter ',', header 'match'); +CREATE FOREIGN TABLE header_dont_match (a int, foo text) SERVER file_server +OPTIONS (format 'csv', filename '/Users/remi/src/postgresql/contrib/file_fdw/data/list1.csv', delimiter ',', header 'match'); -- ERROR -- per-column options tests CREATE FOREIGN TABLE text_csv ( word1 text OPTIONS (force_not_null 'true'), @@ -441,12 +444,14 @@ SET ROLE regress_file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 7 other objects +NOTICE: drop cascades to 9 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for regress_file_fdw_superuser on server file_server drop cascades to user mapping for regress_no_priv_user on server file_server drop cascades to foreign table agg_text drop cascades to foreign table agg_csv drop cascades to foreign table agg_bad +drop cascades to foreign table header_match +drop cascades to foreign table header_dont_match drop cascades to foreign table text_csv DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user; diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 18189abc6c..286bf929dc 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -36,7 +36,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FREEZE [ <replaceable class="parameter">boolean</replaceable> ] DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>' NULL '<replaceable class="parameter">null_string</replaceable>' - HEADER [ <replaceable class="parameter">boolean</replaceable> ] + HEADER { <literal>match</literal> | <literal>true</literal> | <literal>false</literal> } QUOTE '<replaceable class="parameter">quote_character</replaceable>' ESCAPE '<replaceable class="parameter">escape_character</replaceable>' FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } @@ -268,8 +268,11 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable <para> Specifies that the file contains a header line with the names of each column in the file. On output, the first line contains the column - names from the table, and on input, the first line is ignored. - This option is allowed only when using <literal>CSV</literal> format. + names from the table, and on input, the first line is required to match + the column names if set to <literal>match</literal> or discarded when set + to <literal>true</literal>. + This option is allowed only when using <literal>CSV</literal> or + <literal>text</literal> format. </para> </listitem> </varlistentry> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3e199bdfd0..b668f123ad 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -95,6 +95,16 @@ typedef enum CopyInsertMethod CIM_MULTI_CONDITIONAL /* use table_multi_insert only if valid */ } CopyInsertMethod; +/* + * Represents whether the head must be absent, present or present and match. + */ +typedef enum CopyHeader +{ + COPY_HEADER_ABSENT, + COPY_HEADER_PRESENT, + COPY_HEADER_MATCH +} CopyHeader; + /* * This struct contains all the state variables used throughout a COPY * operation. For simplicity, we use the same struct for all variants of COPY, @@ -136,7 +146,7 @@ typedef struct CopyStateData bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ bool csv_mode; /* Comma Separated Value format? */ - bool header_line; /* CSV header line? */ + CopyHeader header_line; /* CSV or text header line? */ char *null_print; /* NULL marker string (server encoding!) */ int null_print_len; /* length of same */ char *null_print_client; /* same converted to file encoding */ @@ -1179,7 +1189,28 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"), parser_errposition(pstate, defel->location))); - cstate->header_line = defGetBoolean(defel); + + PG_TRY(); + { + if (defGetBoolean(defel)) + cstate->header_line = COPY_HEADER_PRESENT; + else + cstate->header_line = COPY_HEADER_ABSENT; + } + PG_CATCH(); + { + if (!cstate->is_copy_from) + PG_RE_THROW(); + + char *sval = defGetString(defel); + if (pg_strcasecmp(sval, "match") == 0) + cstate->header_line = COPY_HEADER_MATCH; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("header requires a boolean or \"match\""))); + } + PG_END_TRY(); } else if (strcmp(defel->defname, "quote") == 0) { @@ -1360,10 +1391,10 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", cstate->delim))); /* Check header */ - if (!cstate->csv_mode && cstate->header_line) + if (cstate->binary && cstate->header_line) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("COPY HEADER available only in CSV mode"))); + errmsg("COPY HEADER available only in CSV and text mode"))); /* Check quote */ if (!cstate->csv_mode && cstate->quote != NULL) @@ -2096,8 +2127,11 @@ CopyTo(CopyState cstate) colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname); - CopyAttributeOutCSV(cstate, colname, false, - list_length(cstate->attnumlist) == 1); + if (cstate->csv_mode) + CopyAttributeOutCSV(cstate, colname, false, + list_length(cstate->attnumlist) == 1); + else + CopyAttributeOutText(cstate, colname); } CopySendEndOfRow(cstate); @@ -3583,12 +3617,53 @@ NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields) /* only available for text or csv input */ Assert(!cstate->binary); - /* on input just throw the header line away */ + /* on input check that the header line is correct if needed */ if (cstate->cur_lineno == 0 && cstate->header_line) { + ListCell *cur; + TupleDesc tupDesc; + + tupDesc = RelationGetDescr(cstate->rel); + cstate->cur_lineno++; - if (CopyReadLine(cstate)) - return false; /* done */ + done = CopyReadLine(cstate); + + if (cstate->header_line == COPY_HEADER_MATCH) + { + if (cstate->csv_mode) + fldct = CopyReadAttributesCSV(cstate); + else + fldct = CopyReadAttributesText(cstate); + + if (fldct < list_length(cstate->attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("missing header"))); + else if (fldct > list_length(cstate->attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("extra data after last expected header"))); + + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + char *colName = cstate->raw_fields[attnum - 1]; + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (colName == NULL) + colName = cstate->null_print; + + if (namestrcmp(&attr->attname, colName) != 0) { + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("wrong header for column \"%s\": got \"%s\"", + NameStr(attr->attname), colName))); + } + } + } + + if (done) + return false; } cstate->cur_lineno++; diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source index a1d529ad36..dc7341529f 100644 --- a/src/test/regress/input/copy.source +++ b/src/test/regress/input/copy.source @@ -87,52 +87,66 @@ ANALYZE bt_f8_heap; ANALYZE array_op_test; ANALYZE array_index_op_test; +-- test header line feature + +create temp table copytest ( + c1 int, + "col with tabulation: " text); + +copy copytest from stdin (header); +this is just a line full of junk that would error out if parsed +1 a +2 b +\. + +copy copytest to stdout (header); + --- test copying in CSV mode with various styles --- of embedded line ending characters -create temp table copytest ( +create temp table copytest2 ( style text, test text, filler int); -insert into copytest values('DOS',E'abc\r\ndef',1); -insert into copytest values('Unix',E'abc\ndef',2); -insert into copytest values('Mac',E'abc\rdef',3); -insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); +insert into copytest2 values('DOS',E'abc\r\ndef',1); +insert into copytest2 values('Unix',E'abc\ndef',2); +insert into copytest2 values('Mac',E'abc\rdef',3); +insert into copytest2 values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); -copy copytest to '@abs_builddir@/results/copytest.csv' csv; +copy copytest2 to '@abs_builddir@/results/copytest.csv' csv; -create temp table copytest2 (like copytest); +create temp table copytest3 (like copytest2); -copy copytest2 from '@abs_builddir@/results/copytest.csv' csv; +copy copytest3 from '@abs_builddir@/results/copytest.csv' csv; -select * from copytest except select * from copytest2; +select * from copytest2 except select * from copytest3; -truncate copytest2; +truncate copytest3; --- same test but with an escape char different from quote char -copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; +copy copytest2 to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; -copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; +copy copytest3 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; -select * from copytest except select * from copytest2; +select * from copytest2 except select * from copytest3; -- test header line feature -create temp table copytest3 ( +create temp table copytest4 ( c1 int, "col with , comma" text, "col with "" quote" int); -copy copytest3 from stdin csv header; +copy copytest4 from stdin csv header; this is just a line full of junk that would error out if parsed 1,a,1 2,b,2 \. -copy copytest3 to stdout csv header; +copy copytest4 to stdout csv header; -- test copy from with a partitioned table create table parted_copytest ( @@ -201,3 +215,28 @@ select * from parted_copytest where b = 1; select * from parted_copytest where b = 2; drop table parted_copytest; + +-- Test header matching feature +create table header_copytest ( + a int, + b int, + c text +); +copy header_copytest from stdin with (header wrong_choice); +copy header_copytest from stdin with (header match); +a b c +1 2 foo +\. +copy header_copytest from stdin with (header match); +a b +1 2 +\. +copy header_copytest from stdin with (header match); +a b c d +1 2 foo bar +\. +copy header_copytest from stdin with (header match, format csv); +a,b,c +1,2,foo +\. +drop table header_copytest; diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source index 938d3551da..c50a2f092c 100644 --- a/src/test/regress/output/copy.source +++ b/src/test/regress/output/copy.source @@ -58,40 +58,49 @@ ANALYZE bt_txt_heap; ANALYZE bt_f8_heap; ANALYZE array_op_test; ANALYZE array_index_op_test; +-- test header line feature +create temp table copytest ( + c1 int, + "col with tabulation: " text); +copy copytest from stdin (header); +copy copytest to stdout (header); +c1 col with tabulation: \t +1 a +2 b --- test copying in CSV mode with various styles --- of embedded line ending characters -create temp table copytest ( +create temp table copytest2 ( style text, test text, filler int); -insert into copytest values('DOS',E'abc\r\ndef',1); -insert into copytest values('Unix',E'abc\ndef',2); -insert into copytest values('Mac',E'abc\rdef',3); -insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); -copy copytest to '@abs_builddir@/results/copytest.csv' csv; -create temp table copytest2 (like copytest); -copy copytest2 from '@abs_builddir@/results/copytest.csv' csv; -select * from copytest except select * from copytest2; +insert into copytest2 values('DOS',E'abc\r\ndef',1); +insert into copytest2 values('Unix',E'abc\ndef',2); +insert into copytest2 values('Mac',E'abc\rdef',3); +insert into copytest2 values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); +copy copytest2 to '@abs_builddir@/results/copytest.csv' csv; +create temp table copytest3 (like copytest2); +copy copytest3 from '@abs_builddir@/results/copytest.csv' csv; +select * from copytest2 except select * from copytest3; style | test | filler -------+------+-------- (0 rows) -truncate copytest2; +truncate copytest3; --- same test but with an escape char different from quote char -copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; -copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; -select * from copytest except select * from copytest2; +copy copytest2 to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; +copy copytest3 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\'; +select * from copytest2 except select * from copytest3; style | test | filler -------+------+-------- (0 rows) -- test header line feature -create temp table copytest3 ( +create temp table copytest4 ( c1 int, "col with , comma" text, "col with "" quote" int); -copy copytest3 from stdin csv header; -copy copytest3 to stdout csv header; +copy copytest4 from stdin csv header; +copy copytest4 to stdout csv header; c1,"col with , comma","col with "" quote" 1,a,1 2,b,2 @@ -165,3 +174,20 @@ select * from parted_copytest where b = 2; (1 row) drop table parted_copytest; +-- Test header matching feature +create table header_copytest ( + a int, + b int, + c text +); +copy header_copytest from stdin with (header wrong_choice); +ERROR: header requires a boolean or "match" +copy header_copytest from stdin with (header match); +copy header_copytest from stdin with (header match); +ERROR: missing header +CONTEXT: COPY header_copytest, line 1: "a b" +copy header_copytest from stdin with (header match); +ERROR: extra data after last expected header +CONTEXT: COPY header_copytest, line 1: "a b c d" +copy header_copytest from stdin with (header match, format csv); +drop table header_copytest;