> Since v5 patch failed applying anymore, updated the patch. Thank you for updating the patch . I made a little review on it where corrected some formatting.
> - COPY with a datatype error that can't be handled as a soft error > > I didn't know proper way to test this, but I've found data type widget's > input function widget_in() defined to occur hard-error in regress.c, > attached patch added a test using it. > This test seems to be weird a bit, because of the "widget" type. The hard error is thrown by the previous test with missing data. Also it'll be interesting for me to list all cases when a hard error can be thrown. Regards, Damir Belyalov Postgres Professional
From 0e1193e00bb5ee810a015a2baaf7c79e395a54c7 Mon Sep 17 00:00:00 2001 From: Damir Belyalov <d.belya...@postgrespro.ru> Date: Fri, 15 Sep 2023 11:14:57 +0300 Subject: [PATCH] ignore errors --- doc/src/sgml/ref/copy.sgml | 13 +++++++++ src/backend/commands/copy.c | 13 +++++++++ src/backend/commands/copyfrom.c | 37 ++++++++++++++++++++++++ src/backend/commands/copyfromparse.c | 20 ++++++++++--- src/bin/psql/tab-complete.c | 3 +- src/include/commands/copy.h | 1 + src/include/commands/copyfrom_internal.h | 3 ++ src/test/regress/expected/copy2.out | 28 ++++++++++++++++++ src/test/regress/sql/copy2.sql | 26 +++++++++++++++++ 9 files changed, 139 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 4d614a0225..d5cdbb4025 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) + IGNORE_DATATYPE_ERRORS [ <replaceable class="parameter">boolean</replaceable> ] ENCODING '<replaceable class="parameter">encoding_name</replaceable>' </synopsis> </refsynopsisdiv> @@ -370,6 +371,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>IGNORE_DATATYPE_ERRORS</literal></term> + <listitem> + <para> + Drops rows that contain malformed data while copying. These are rows + with columns where the data type's input-function raises an error. + This option is not allowed when using binary format. Note that this + is only supported in current <command>COPY</command> syntax. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>ENCODING</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index f14fae3308..beb73f5357 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -419,6 +419,7 @@ ProcessCopyOptions(ParseState *pstate, bool format_specified = false; bool freeze_specified = false; bool header_specified = false; + bool ignore_datatype_errors_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -458,6 +459,13 @@ ProcessCopyOptions(ParseState *pstate, freeze_specified = true; opts_out->freeze = defGetBoolean(defel); } + else if (strcmp(defel->defname, "ignore_datatype_errors") == 0) + { + if (ignore_datatype_errors_specified) + errorConflictingDefElem(defel, pstate); + ignore_datatype_errors_specified = true; + opts_out->ignore_datatype_errors = defGetBoolean(defel); + } else if (strcmp(defel->defname, "delimiter") == 0) { if (opts_out->delim) @@ -594,6 +602,11 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cannot specify DEFAULT in BINARY mode"))); + if (opts_out->binary && opts_out->ignore_datatype_errors) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify IGNORE_DATATYPE_ERRORS in BINARY mode"))); + /* Set defaults for omitted options */ if (!opts_out->delim) opts_out->delim = opts_out->csv_mode ? "," : "\t"; diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 70871ed819..b18aea6376 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -752,6 +752,14 @@ CopyFrom(CopyFromState cstate) ti_options |= TABLE_INSERT_FROZEN; } + /* Set up soft error handler for IGNORE_DATATYPE_ERRORS */ + if (cstate->opts.ignore_datatype_errors) + { + ErrorSaveContext escontext = {T_ErrorSaveContext}; + escontext.details_wanted = true; + cstate->escontext = escontext; + } + /* * We need a ResultRelInfo so we can use the regular executor's * index-entry-making machinery. (There used to be a huge amount of code @@ -987,7 +995,36 @@ CopyFrom(CopyFromState cstate) /* Directly store the values/nulls array in the slot */ if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull)) + { + if (cstate->opts.ignore_datatype_errors && + cstate->ignored_errors_count > 0) + ereport(WARNING, + errmsg("%zd rows were skipped due to data type incompatibility", + cstate->ignored_errors_count)); break; + } + + /* Soft error occured, skip this tuple and log the reason */ + if (cstate->escontext.error_occurred) + { + ErrorSaveContext new_escontext = {T_ErrorSaveContext}; + + /* Adjust elevel so we don't jump out */ + cstate->escontext.error_data->elevel = WARNING; + + /* + * Despite the name, this won't raise an error since elevel is + * WARNING now. + */ + ThrowErrorData(cstate->escontext.error_data); + + ExecClearTuple(myslot); + + new_escontext.details_wanted = true; + cstate->escontext = new_escontext; + + continue; + } ExecStoreVirtualTuple(myslot); diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index f553734582..cf4dad1106 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -70,6 +70,7 @@ #include "libpq/pqformat.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/miscnodes.h" #include "pgstat.h" #include "port/pg_bswap.h" #include "utils/builtins.h" @@ -956,10 +957,21 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]); } else - values[m] = InputFunctionCall(&in_functions[m], - string, - typioparams[m], - att->atttypmod); + /* + * If IGNORE_DATATYPE_ERRORS is enabled, skip rows with + * datatype errors. + */ + if (!InputFunctionCallSafe(&in_functions[m], + string, + typioparams[m], + att->atttypmod, + (Node *) &cstate->escontext, + &values[m])) + { + cstate->ignored_errors_count++; + + return true; + } cstate->cur_attname = NULL; cstate->cur_attval = NULL; diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 779fdc90cb..2fba51f648 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2869,7 +2869,8 @@ psql_completion(const char *text, int start, int end) else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(")) COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", - "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT"); + "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT", + "IGNORE_DATATYPE_ERRORS"); /* Complete COPY <sth> FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 33175868f6..c2e55ac21f 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -42,6 +42,7 @@ typedef struct CopyFormatOptions * -1 if not specified */ bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ + bool ignore_datatype_errors; /* ignore rows with datatype errors */ bool csv_mode; /* Comma Separated Value format? */ CopyHeaderChoice header_line; /* header line? */ char *null_print; /* NULL marker string (server encoding!) */ diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index ac2c16f8b8..e5bdae2d25 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -16,6 +16,7 @@ #include "commands/copy.h" #include "commands/trigger.h" +#include "nodes/miscnodes.h" /* * Represents the different source cases we need to worry about at @@ -94,6 +95,8 @@ typedef struct CopyFromStateData * default value */ FmgrInfo *in_functions; /* array of input functions for each attrs */ Oid *typioparams; /* array of element types for in_functions */ + ErrorSaveContext escontext; /* soft error trapper during in_functions execution */ + int64 ignored_errors_count; /* total number of ignored errors */ int *defmap; /* array of default att numbers related to * missing att */ ExprState **defexprs; /* array of default att expressions for all diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index faf1a4d1b0..ac9c99f083 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -82,6 +82,8 @@ COPY x to stdin (format BINARY, delimiter ','); ERROR: cannot specify DELIMITER in BINARY mode COPY x to stdin (format BINARY, null 'x'); ERROR: cannot specify NULL in BINARY mode +COPY x to stdin (format BINARY, ignore_datatype_errors); +ERROR: cannot specify IGNORE_DATATYPE_ERRORS in BINARY mode COPY x to stdin (format TEXT, force_quote(a)); ERROR: COPY force quote available only in CSV mode COPY x from stdin (format CSV, force_quote(a)); @@ -666,6 +668,30 @@ SELECT * FROM instead_of_insert_tbl; (2 rows) COMMIT; +-- tests for IGNORE_DATATYPE_ERRORS option +CREATE TABLE check_ign_err (n int, m int[], k int); +COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS); +WARNING: invalid input syntax for type integer: "a" +WARNING: value "3333333333" is out of range for type integer +WARNING: invalid input syntax for type integer: "a" +WARNING: invalid input syntax for type integer: "" +WARNING: 4 rows were skipped due to data type incompatibility +SELECT * FROM check_ign_err; + n | m | k +---+-----+--- + 1 | {1} | 1 + 5 | {5} | 5 +(2 rows) + +-- test datatype error that can't be handled as soft: should fail +CREATE TABLE hard_err(foo widget); +COPY hard_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS); +ERROR: invalid input syntax for type widget: "1" +CONTEXT: COPY hard_err, line 1, column foo: "1" +-- test missing data: should fail +COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS); +ERROR: missing data for column "k" +CONTEXT: COPY check_ign_err, line 1: "1 {1}" -- clean up DROP TABLE forcetest; DROP TABLE vistest; @@ -680,6 +706,8 @@ DROP TABLE instead_of_insert_tbl; DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); +DROP TABLE check_ign_err; +DROP TABLE hard_err; -- -- COPY FROM ... DEFAULT -- diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index d759635068..e8c2c1aca3 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -70,6 +70,7 @@ COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii'); -- incorrect options COPY x to stdin (format BINARY, delimiter ','); COPY x to stdin (format BINARY, null 'x'); +COPY x to stdin (format BINARY, ignore_datatype_errors); COPY x to stdin (format TEXT, force_quote(a)); COPY x from stdin (format CSV, force_quote(a)); COPY x to stdout (format TEXT, force_not_null(a)); @@ -464,6 +465,29 @@ test1 SELECT * FROM instead_of_insert_tbl; COMMIT; +-- tests for IGNORE_DATATYPE_ERRORS option +CREATE TABLE check_ign_err (n int, m int[], k int); +COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS); +1 {1} 1 +a {2} 2 +3 {3} 3333333333 +4 {a, 4} 4 + +5 {5} 5 +\. +SELECT * FROM check_ign_err; + +-- test datatype error that can't be handled as soft: should fail +CREATE TABLE hard_err(foo widget); +COPY hard_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS); +1 +\. + +-- test missing data: should fail +COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS); +1 {1} +\. + -- clean up DROP TABLE forcetest; DROP TABLE vistest; @@ -478,6 +502,8 @@ DROP TABLE instead_of_insert_tbl; DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); +DROP TABLE check_ign_err; +DROP TABLE hard_err; -- -- COPY FROM ... DEFAULT -- 2.34.1