On Mon, Sep 9, 2024 at 10:34 PM Jim Jones <jim.jo...@uni-muenster.de> wrote: > > > Hi there > > On 26.08.24 02:00, jian he wrote: > > hi all. > > patch updated. > > simplified the code a lot. > > > > idea is same: > > COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); > > > > If the STDIN number of columns is the same as the target table, then > > InputFunctionCallSafe > > call failure will make that column values be null. > > > > > > If the STDIN number of columns is not the same as the target table, then > > error > > ERROR: missing data for column \"%s\" > > ERROR: extra data after last expected column > > which is status quo. > > I wanted to give it another try, but the patch does not apply ... >
here we are. please check the attached file.
From 3d6b3d8b0393b5bc4950e85c40c69c0da46c8035 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Thu, 12 Sep 2024 17:07:02 +0800 Subject: [PATCH v4 1/1] on_error set_to_null extent "on_error action", introduce new option: on_error set_to_null. Due to current grammar, we cannot use "on_error null", so I choose on_error set_to_null. any data type conversion errors while the COPY FROM process will set that column value to be NULL. this will only work with COPY FROM and non-binary format. However this will respect the not-null constraint, meaning, if you actually converted error to null, but the column has not-null constraint, not-null constraint violation ERROR will be raised. discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=bp3d1_asfe...@mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 1 + src/backend/commands/copy.c | 4 ++- src/backend/commands/copyfrom.c | 9 ++--- src/backend/commands/copyfromparse.c | 11 +++++++ src/include/commands/copy.h | 1 + src/test/regress/expected/copy2.out | 49 ++++++++++++++++++++++++++++ src/test/regress/sql/copy2.sql | 44 +++++++++++++++++++++++++ 7 files changed, 114 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 1518af8a04..b6bdf45e7e 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -394,6 +394,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable An <replaceable class="parameter">error_action</replaceable> value of <literal>stop</literal> means fail the command, while <literal>ignore</literal> means discard the input row and continue with the next one. + <literal>set_to_null</literal> means the input value will set to <literal>null</literal> and continue with the next one. The default is <literal>stop</literal>. </para> <para> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3bb579a3a4..05b152a090 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) parser_errposition(pstate, def->location))); /* - * Allow "stop", or "ignore" values. + * Allow "stop", or "ignore", "set_to_null" values. */ if (pg_strcasecmp(sval, "stop") == 0) return COPY_ON_ERROR_STOP; if (pg_strcasecmp(sval, "ignore") == 0) return COPY_ON_ERROR_IGNORE; + if (pg_strcasecmp(sval, "set_to_null") == 0) + return COPY_ON_ERROR_NULL; ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 2d3462913e..1669fac444 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1319,7 +1319,7 @@ CopyFrom(CopyFromState cstate) /* Done, clean up */ error_context_stack = errcallback.previous; - if (cstate->opts.on_error != COPY_ON_ERROR_STOP && + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && cstate->num_errors > 0) ereport(NOTICE, errmsg_plural("%llu row was skipped due to data type incompatibility", @@ -1471,10 +1471,11 @@ BeginCopyFrom(ParseState *pstate, cstate->escontext->error_occurred = false; /* - * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other - * options later + * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL. + * We'll add other options later */ - if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE || + cstate->opts.on_error == COPY_ON_ERROR_NULL) cstate->escontext->details_wanted = false; } else diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 97a4c387a3..3fe32b76ac 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -969,6 +969,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, { Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP); + if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + { + values[m] = (Datum) 0; + nulls[m] = true; + /* + * set error_occurred to false, so next + * InputFunctionCallSafe call behave sane. + */ + cstate->escontext->error_occurred = false; + continue; + } cstate->num_errors++; if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 141fd48dc1..fa87232ed7 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice { COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */ COPY_ON_ERROR_IGNORE, /* ignore errors */ + COPY_ON_ERROR_NULL, /* set error field to null */ } CopyOnErrorChoice; /* diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 61a19cdc4c..b92a5771ff 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (on_error ignore, on_error ignore); ^ +COPY x from stdin (on_error set_to_null, on_error set_to_null); +ERROR: conflicting or redundant options +LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul... + ^ COPY x from stdin (log_verbosity default, log_verbosity verbose); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb... @@ -92,6 +96,8 @@ COPY x to stdin (format BINARY, null 'x'); ERROR: cannot specify NULL in BINARY mode COPY x from stdin (format BINARY, on_error ignore); ERROR: only ON_ERROR STOP is allowed in BINARY mode +COPY x from stdin (format BINARY, on_error set_to_null); +ERROR: only ON_ERROR STOP is allowed in BINARY mode COPY x from stdin (on_error unsupported); ERROR: COPY ON_ERROR "unsupported" not recognized LINE 1: COPY x from stdin (on_error unsupported); @@ -112,6 +118,10 @@ COPY x to stdin (format BINARY, on_error unsupported); ERROR: COPY ON_ERROR cannot be used with COPY TO LINE 1: COPY x to stdin (format BINARY, on_error unsupported); ^ +COPY x to stdin (on_error set_to_null); +ERROR: COPY ON_ERROR cannot be used with COPY TO +LINE 1: COPY x to stdin (on_error set_to_null); + ^ COPY x to stdout (log_verbosity unsupported); ERROR: COPY LOG_VERBOSITY "unsupported" not recognized LINE 1: COPY x to stdout (log_verbosity unsupported); @@ -753,6 +763,24 @@ CONTEXT: COPY check_ign_err NOTICE: skipping row due to data type incompatibility at line 8 for column "k": "a" CONTEXT: COPY check_ign_err NOTICE: 6 rows were skipped due to data type incompatibility +create domain d_int_not_null as int not null check(value > 0); +CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int); +--fail +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +ERROR: null value in column "c" of relation "t_on_error_null" violates not-null constraint +DETAIL: Failing row contains (11, null, 12). +CONTEXT: COPY t_on_error_null, line 1: "11 a 12" +--ok +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +select * from t_on_error_null; + a | c | b +---+----+---- + | 11 | 13 + | 11 | 14 +(2 rows) + +drop table t_on_error_null; +drop domain d_int_not_null; -- tests for on_error option with log_verbosity and null constraint via domain CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); @@ -789,6 +817,27 @@ CONTEXT: COPY check_ign_err, line 1: "1 {1}" COPY check_ign_err FROM STDIN WITH (on_error ignore); ERROR: extra data after last expected column CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc" +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error set_to_null); +\pset null NULL +SELECT * FROM check_ign_err; + n | m | k +------+-----+------ + 1 | {1} | NULL + 2 | {2} | 1 + 3 | {3} | 2 + 4 | {4} | NULL + NULL | {5} | NULL +(5 rows) + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +ERROR: missing data for column "k" +CONTEXT: COPY check_ign_err, line 1, column m: "" +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +ERROR: extra data after last expected column +CONTEXT: COPY check_ign_err, line 1: "1,{1},1,1" -- clean up DROP TABLE forcetest; DROP TABLE vistest; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 8b14962194..1144822768 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b)); COPY x from stdin (convert_selectively (a), convert_selectively (b)); COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii'); COPY x from stdin (on_error ignore, on_error ignore); +COPY x from stdin (on_error set_to_null, on_error set_to_null); COPY x from stdin (log_verbosity default, log_verbosity verbose); -- incorrect options COPY x to stdin (format BINARY, delimiter ','); COPY x to stdin (format BINARY, null 'x'); COPY x from stdin (format BINARY, on_error ignore); +COPY x from stdin (format BINARY, on_error set_to_null); COPY x from stdin (on_error unsupported); COPY x to stdin (format TEXT, force_quote(a)); COPY x from stdin (format CSV, force_quote(a)); @@ -81,6 +83,7 @@ COPY x to stdin (format CSV, force_not_null(a)); COPY x to stdout (format TEXT, force_null(a)); COPY x to stdin (format CSV, force_null(a)); COPY x to stdin (format BINARY, on_error unsupported); +COPY x to stdin (on_error set_to_null); COPY x to stdout (log_verbosity unsupported); -- too many columns in column list: should fail @@ -526,6 +529,24 @@ a {2} 2 8 {8} 8 \. +create domain d_int_not_null as int not null check(value > 0); +CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int); + +--fail +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +11 a 12 +\. + +--ok +COPY t_on_error_null FROM STDIN WITH (on_error set_to_null); +-1 11 13 +a 11 14 +\. + +select * from t_on_error_null; +drop table t_on_error_null; +drop domain d_int_not_null; + -- tests for on_error option with log_verbosity and null constraint via domain CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); @@ -557,6 +578,29 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore); 1 {1} 3 abc \. + +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error set_to_null); +1 {1} a +2 {2} 1 +3 {3} 2 +4 {4} b +a {5} c +\. + +\pset null NULL +SELECT * FROM check_ign_err; + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +1, +\. + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +1,{1},1,1 +\. + -- clean up DROP TABLE forcetest; DROP TABLE vistest; base-commit: 00c76cf21c42c17e60e73a87dea0d1b4e234d9da -- 2.34.1