> > FWIW, Greenplum has a similar construct (but which also logs the errors > in the > db) where data type errors are skipped as long as the number of errors > don't > exceed a reject limit. If the reject limit is reached then the COPY > fails: > > > > LOG ERRORS [ SEGMENT REJECT LIMIT <count> [ ROWS | PERCENT ]] > > > IIRC the gist of this was to catch then the user copies the wrong input > data or > plain has a broken file. Rather than finding out after copying n rows > which > are likely to be garbage the process can be restarted. >
I think this is a matter for discussion. The same question is: "Where to log errors to separate files or to the system logfile?". IMO it's better for users to log short-detailed error message to system logfile and not output errors to the terminal. This version of the patch has a compiler error in the error message: > Yes, corrected it. Changed "ignored_errors" to int64 because "processed" (used for counting copy rows) is int64. I felt just logging "Error: %ld" would make people wonder the meaning of > the %ld. Logging something like ""Error: %ld data type errors were > found" might be clearer. > Thanks. For more clearance change the message to: "Errors were found: %". Regards, Damir Belyalov Postgres Professional
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index c25b52d0cb..706b929947 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -34,6 +34,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORMAT <replaceable class="parameter">format_name</replaceable> FREEZE [ <replaceable class="parameter">boolean</replaceable> ] + IGNORE_DATATYPE_ERRORS [ <replaceable class="parameter">boolean</replaceable> ] DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>' NULL '<replaceable class="parameter">null_string</replaceable>' HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ] @@ -233,6 +234,17 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>IGNORE_DATATYPE_ERRORS</literal></term> + <listitem> + <para> + Drops rows that contain malformed data while copying. These are rows + with columns where the data type's input-function raises an error. + Outputs warnings about rows with incorrect data to system logfile. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>DELIMITER</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index e34f583ea7..0334894014 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -410,6 +410,7 @@ ProcessCopyOptions(ParseState *pstate, bool format_specified = false; bool freeze_specified = false; bool header_specified = false; + bool ignore_datatype_errors_specified= false; ListCell *option; /* Support external use for option sanity checking */ @@ -449,6 +450,13 @@ ProcessCopyOptions(ParseState *pstate, freeze_specified = true; opts_out->freeze = defGetBoolean(defel); } + else if (strcmp(defel->defname, "ignore_datatype_errors") == 0) + { + if (ignore_datatype_errors_specified) + errorConflictingDefElem(defel, pstate); + ignore_datatype_errors_specified = true; + opts_out->ignore_datatype_errors = defGetBoolean(defel); + } else if (strcmp(defel->defname, "delimiter") == 0) { if (opts_out->delim) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 29cd1cf4a6..facfc44def 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -949,10 +949,14 @@ CopyFrom(CopyFromState cstate) errcallback.previous = error_context_stack; error_context_stack = &errcallback; + if (cstate->opts.ignore_datatype_errors) + cstate->ignored_errors = 0; + for (;;) { TupleTableSlot *myslot; bool skip_tuple; + ErrorSaveContext escontext = {T_ErrorSaveContext}; CHECK_FOR_INTERRUPTS(); @@ -985,9 +989,26 @@ CopyFrom(CopyFromState cstate) ExecClearTuple(myslot); + if (cstate->opts.ignore_datatype_errors) + { + escontext.details_wanted = true; + cstate->escontext = escontext; + } + /* Directly store the values/nulls array in the slot */ if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull)) + { + if (cstate->opts.ignore_datatype_errors && cstate->ignored_errors > 0) + ereport(WARNING, errmsg("Errors were found: %lld", (long long) cstate->ignored_errors)); break; + } + + /* Soft error occured, skip this tuple */ + if (cstate->escontext.error_occurred) + { + ExecClearTuple(myslot); + continue; + } ExecStoreVirtualTuple(myslot); diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 91b564c2bc..9c36b0dc8b 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -70,6 +70,7 @@ #include "libpq/pqformat.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/miscnodes.h" #include "pgstat.h" #include "port/pg_bswap.h" #include "utils/builtins.h" @@ -938,10 +939,23 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, cstate->cur_attname = NameStr(att->attname); cstate->cur_attval = string; - values[m] = InputFunctionCall(&in_functions[m], - string, - typioparams[m], - att->atttypmod); + + /* If IGNORE_DATATYPE_ERRORS is enabled skip rows with datatype errors */ + if (!InputFunctionCallSafe(&in_functions[m], + string, + typioparams[m], + att->atttypmod, + (Node *) &cstate->escontext, + &values[m])) + { + cstate->ignored_errors++; + + ereport(LOG, + errmsg("%s", cstate->escontext.error_data->message)); + + return true; + } + if (string != NULL) nulls[m] = false; cstate->cur_attname = NULL; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index a0138382a1..d79d293c0d 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -701,7 +701,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P - IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE + IDENTITY_P IF_P IGNORE_DATATYPE_ERRORS ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -3378,6 +3378,10 @@ copy_opt_item: { $$ = makeDefElem("freeze", (Node *) makeBoolean(true), @1); } + | IGNORE_DATATYPE_ERRORS + { + $$ = makeDefElem("ignore_datatype_errors", (Node *)makeBoolean(true), @1); + } | DELIMITER opt_as Sconst { $$ = makeDefElem("delimiter", (Node *) makeString($3), @1); @@ -16821,6 +16825,7 @@ unreserved_keyword: | HOUR_P | IDENTITY_P | IF_P + | IGNORE_DATATYPE_ERRORS | IMMEDIATE | IMMUTABLE | IMPLICIT_P @@ -17375,6 +17380,7 @@ bare_label_keyword: | HOLD | IDENTITY_P | IF_P + | IGNORE_DATATYPE_ERRORS | ILIKE | IMMEDIATE | IMMUTABLE diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 8f12af799b..0f290cd6ff 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2857,7 +2857,8 @@ psql_completion(const char *text, int start, int end) else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(")) COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", - "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING"); + "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", + "IGNORE_DATATYPE_ERRORS"); /* Complete COPY <sth> FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 8e5f6ff148..a7eb0f8883 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -42,6 +42,7 @@ typedef struct CopyFormatOptions * -1 if not specified */ bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ + bool ignore_datatype_errors; /* ignore rows with datatype errors */ bool csv_mode; /* Comma Separated Value format? */ CopyHeaderChoice header_line; /* header line? */ char *null_print; /* NULL marker string (server encoding!) */ diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index 7b1c4327bd..b9ce636f7b 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -16,6 +16,7 @@ #include "commands/copy.h" #include "commands/trigger.h" +#include "nodes/miscnodes.h" /* * Represents the different source cases we need to worry about at @@ -94,6 +95,8 @@ typedef struct CopyFromStateData AttrNumber num_defaults; FmgrInfo *in_functions; /* array of input functions for each attrs */ Oid *typioparams; /* array of element types for in_functions */ + ErrorSaveContext escontext; /* soft error trapper during in_functions execution */ + int64 ignored_errors; /* total number of ignored errors */ int *defmap; /* array of default att numbers */ ExprState **defexprs; /* array of default att expressions */ bool volatile_defexprs; /* is any of defexprs volatile? */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index bb36213e6f..1d7f9efbc0 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -196,6 +196,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("ignore_datatype_errors", IGNORE_DATATYPE_ERRORS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 090ef6c7a8..525e3bc454 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -666,6 +666,17 @@ SELECT * FROM instead_of_insert_tbl; (2 rows) COMMIT; +-- tests for IGNORE_DATATYPE_ERRORS option +CREATE TABLE check_ign_err (n int, m int[], k int); +COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS; +WARNING: Errors were found: 4 +SELECT * FROM check_ign_err; + n | m | k +---+-----+--- + 1 | {1} | 1 + 5 | {5} | 5 +(2 rows) + -- clean up DROP TABLE forcetest; DROP TABLE vistest; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index b0de82c3aa..380adfce96 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -464,6 +464,18 @@ test1 SELECT * FROM instead_of_insert_tbl; COMMIT; +-- tests for IGNORE_DATATYPE_ERRORS option +CREATE TABLE check_ign_err (n int, m int[], k int); +COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS; +1 {1} 1 +a {2} 2 +3 {3} 3333333333 +4 {a, 4} 4 + +5 {5} 5 +\. +SELECT * FROM check_ign_err; + -- clean up DROP TABLE forcetest; DROP TABLE vistest;