Hi, On Sun, Jun 12, 2022 at 09:36:13AM -0400, Andrew Dunstan wrote: > > On 2022-06-07 Tu 11:47, Julien Rouhaud wrote: > > > > First, probably nitpicking, the HEADER MATCH is allowed for COPY TO, is that > > expected? The documentation isn't really explicit about it, but there's > > nothing to match when exporting data it's a bit surprising. I'm not > > opposed to > > have HEADER MATCH means HEADER ON for COPY TO, as as-is one can easily reuse > > the commands history, but maybe it should be clearly documented? > > > I think it makes more sense to have a sanity check to prevent HEADER > MATCH with COPY TO.
I'm fine with it. I added such a check and mentioned it in the documentation. > > Then, apparently HEADER MATCH doesn't let you do sanity checks against a > > custom > > column list. This one looks like a clear oversight, as something like that > > should be entirely valid IMHO: > > > > CREATE TABLE tbl(col1 int, col2 int); > > COPY tbl (col2, col1) TO '/path/to/file' WITH (HEADER MATCH); > > COPY tbl (col2, col1) FROM '/path/to/file' WITH (HEADER MATCH); > > > > but right now it errors out with: > > > > ERROR: column name mismatch in header line field 1: got "col1", expected > > "col2" > > > > Note that the error message is bogus if you specify attributes in a > > different order from the relation, as the code is mixing access to the tuple > > desc and access to the raw fields with the same offset. > > [...] > I think it should, but a temporary alternative would be to forbid HEADER > MATCH with explicit column lists until we can make it work right. I think it would still be problematic if the target table has dropped columns. Fortunately, as I initially thought the problem is only due to a thinko in the original commit which used a wrong variable for the raw_fields offset. Once fixed (attached v1) I didn't see any other problem in the rest of the logic and all the added regression tests work as expected.
>From 2f49e9feb2827856c8a0e3098500109ddd1962c9 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud <julien.rouh...@free.fr> Date: Mon, 13 Jun 2022 09:49:23 +0800 Subject: [PATCH v1] Fix processing of header match option for COPY. Thinko in 072132f04en which used the attnum offset to access the raw_fields array, leading to incorrect results of crash. Use the correct variable, and add some regression tests to cover a bit more scenario for the HEADER MATCH option. While at it, disallow HEADER MATCH in COPY TO as there is no validation that can be done in that case. Author: Julien Rouhaud Discussion: https://postgr.es/m/20220607154744.vvmitnqhyxrne5ms@jrouhaud --- doc/src/sgml/ref/copy.sgml | 2 ++ src/backend/commands/copy.c | 12 ++++++++++-- src/backend/commands/copyfromparse.c | 5 +++-- src/test/regress/expected/copy.out | 21 ++++++++++++++++++++- src/test/regress/sql/copy.sql | 24 ++++++++++++++++++++---- 5 files changed, 55 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 40af423ccf..8aae711b3b 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -282,6 +282,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable of the columns in the header line must match the actual column names of the table, otherwise an error is raised. This option is not allowed when using <literal>binary</literal> format. + The <literal>MATCH</literal> option is only valid for <command>COPY + FROM</command> commands. </para> </listitem> </varlistentry> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index f448d39c7e..e596bebb0b 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -318,7 +318,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, * defGetBoolean() but also accepts the special value "match". */ static CopyHeaderChoice -defGetCopyHeaderChoice(DefElem *def) +defGetCopyHeaderChoice(DefElem *def, bool is_from) { /* * If no parameter given, assume "true" is meant. @@ -360,7 +360,15 @@ defGetCopyHeaderChoice(DefElem *def) if (pg_strcasecmp(sval, "off") == 0) return COPY_HEADER_FALSE; if (pg_strcasecmp(sval, "match") == 0) + { + /* match is only valid for COPY FROM */ + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s match is only valid for COPY FROM", + def->defname))); return COPY_HEADER_MATCH; + } } break; } @@ -452,7 +460,7 @@ ProcessCopyOptions(ParseState *pstate, if (header_specified) errorConflictingDefElem(defel, pstate); header_specified = true; - opts_out->header_line = defGetCopyHeaderChoice(defel); + opts_out->header_line = defGetCopyHeaderChoice(defel, is_from); } else if (strcmp(defel->defname, "quote") == 0) { diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index e06534943f..57813b3458 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -789,11 +789,12 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields) foreach(cur, cstate->attnumlist) { int attnum = lfirst_int(cur); - char *colName = cstate->raw_fields[attnum - 1]; + char *colName; Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); - fldnum++; + Assert(fldnum < cstate->max_fields); + colName = cstate->raw_fields[fldnum++]; if (colName == NULL) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index e8d6b4fc13..e9d1ec348a 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -182,9 +182,21 @@ create table header_copytest ( b int, c text ); +-- Make sure it works with with dropped columns +alter table header_copytest drop column c; +alter table header_copytest add column c text; +copy header_copytest to stdout with (header match); +ERROR: header match is only valid for COPY FROM copy header_copytest from stdin with (header wrong_choice); ERROR: header requires a Boolean value or "match" +-- works copy header_copytest from stdin with (header match); +copy header_copytest (c, a, b) from stdin with (header match); +copy header_copytest from stdin with (header match, format csv); +-- the rest errors out +copy header_copytest (c, b, a) from stdin with (header match); +ERROR: column name mismatch in header line field 1: got "a", expected "c" +CONTEXT: COPY header_copytest, line 1: "a b c" copy header_copytest from stdin with (header match); ERROR: column name mismatch in header line field 3: got null value ("\N"), expected "c" CONTEXT: COPY header_copytest, line 1: "a b \N" @@ -197,5 +209,12 @@ CONTEXT: COPY header_copytest, line 1: "a b c d" copy header_copytest from stdin with (header match); ERROR: column name mismatch in header line field 3: got "d", expected "c" CONTEXT: COPY header_copytest, line 1: "a b d" -copy header_copytest from stdin with (header match, format csv); +SELECT * FROM header_copytest ORDER BY a; + a | b | c +---+---+----- + 1 | 2 | foo + 3 | 4 | bar + 5 | 6 | baz +(3 rows) + drop table header_copytest; diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index d72d226f34..0b15ed2653 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -204,11 +204,29 @@ create table header_copytest ( b int, c text ); +-- Make sure it works with with dropped columns +alter table header_copytest drop column c; +alter table header_copytest add column c text; +copy header_copytest to stdout with (header match); copy header_copytest from stdin with (header wrong_choice); +-- works copy header_copytest from stdin with (header match); a b c 1 2 foo \. +copy header_copytest (c, a, b) from stdin with (header match); +c a b +bar 3 4 +\. +copy header_copytest from stdin with (header match, format csv); +a,b,c +5,6,baz +\. +-- the rest errors out +copy header_copytest (c, b, a) from stdin with (header match); +a b c +1 2 foo +\. copy header_copytest from stdin with (header match); a b \N 1 2 foo @@ -225,8 +243,6 @@ copy header_copytest from stdin with (header match); a b d 1 2 foo \. -copy header_copytest from stdin with (header match, format csv); -a,b,c -1,2,foo -\. + +SELECT * FROM header_copytest ORDER BY a; drop table header_copytest; -- 2.33.1