On 2023-03-17 21:23, torikoshia wrote:
On 2023-03-07 18:09, Daniel Gustafsson wrote:
On 7 Mar 2023, at 09:35, Damir Belyalov <dam.be...@gmail.com> wrote:
I felt just logging "Error: %ld" would make people wonder the meaning
of
the %ld. Logging something like ""Error: %ld data type errors were
found" might be clearer.
Thanks. For more clearance change the message to: "Errors were found:
%".
I'm not convinced that this adds enough clarity to assist the user.
We also
shouldn't use "error" in a WARNING log since the user has explicitly
asked to
skip rows on error, so it's not an error per se.
+1
How about something like:
ereport(WARNING,
(errmsg("%ld rows were skipped due to data type
incompatibility", cstate->ignored_errors),
errhint("Skipped rows can be inspected in the database log
for reprocessing.")));
Since skipped rows cannot be inspected in the log when
log_error_verbosity is set to terse,
it might be better without this errhint.
Removed errhint.
Modified some codes since v3 couldn't be applied HEAD anymore.
Also modified v3 patch as below:
65 + if (cstate->opts.ignore_datatype_errors)
66 + cstate->ignored_errors = 0;
67 +
It seems not necessary since cstate is initialized by palloc0() in
BeginCopyFrom().
134 + ereport(LOG,
135 + errmsg("%s",
cstate->escontext.error_data->message));
136 +
137 + return true;
Since LOG means 'Reports information of interest to administrators'
according to the manual[1], datatype error should not be logged as
LOG. I put it back in WARNING.
[1]
https://www.postgresql.org/docs/current/runtime-config-logging.html#RUNTIME-CONFIG-SEVERITY-LEVELS
--
Regards,
--
Atsushi Torikoshi
NTT DATA CORPORATION
From 6764d7e0f21ca266d7426cb922fd00e5138ec857 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Wed, 22 Mar 2023 22:00:15 +0900
Subject: [PATCH v4] Add new COPY option IGNORE_DATATYPE_ERRORS
Add new COPY option IGNORE_DATATYPE_ERRORS.
Currently entire COPY fails even when there is one unexpected
data regarding data type or range.
IGNORE_DATATYPE_ERRORS ignores these errors and skips them and
COPY data which don't contain problem.
This patch uses the soft error handling infrastructure, which
is introduced by d9f7f5d32f20.
Author: Damir Belyalov, Atsushi Torikoshi
---
doc/src/sgml/ref/copy.sgml | 12 ++++++++++++
src/backend/commands/copy.c | 8 ++++++++
src/backend/commands/copyfrom.c | 20 ++++++++++++++++++++
src/backend/commands/copyfromparse.c | 19 +++++++++++++++----
src/backend/parser/gram.y | 8 +++++++-
src/include/commands/copy.h | 1 +
src/include/commands/copyfrom_internal.h | 3 +++
src/include/parser/kwlist.h | 1 +
src/test/regress/expected/copy2.out | 15 +++++++++++++++
src/test/regress/sql/copy2.sql | 12 ++++++++++++
10 files changed, 94 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 5e591ed2e6..168b1c05d9 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -34,6 +34,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
FORMAT <replaceable class="parameter">format_name</replaceable>
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
+ IGNORE_DATATYPE_ERRORS [ <replaceable class="parameter">boolean</replaceable> ]
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
NULL '<replaceable class="parameter">null_string</replaceable>'
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
@@ -234,6 +235,17 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>IGNORE_DATATYPE_ERRORS</literal></term>
+ <listitem>
+ <para>
+ Drops rows that contain malformed data while copying. These are rows
+ with columns where the data type's input-function raises an error.
+ Outputs warnings about rows with incorrect data to system logfile.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>DELIMITER</literal></term>
<listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f14fae3308..02d911abbe 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -419,6 +419,7 @@ ProcessCopyOptions(ParseState *pstate,
bool format_specified = false;
bool freeze_specified = false;
bool header_specified = false;
+ bool ignore_datatype_errors_specified = false;
ListCell *option;
/* Support external use for option sanity checking */
@@ -458,6 +459,13 @@ ProcessCopyOptions(ParseState *pstate,
freeze_specified = true;
opts_out->freeze = defGetBoolean(defel);
}
+ else if (strcmp(defel->defname, "ignore_datatype_errors") == 0)
+ {
+ if (ignore_datatype_errors_specified)
+ errorConflictingDefElem(defel, pstate);
+ ignore_datatype_errors_specified = true;
+ opts_out->ignore_datatype_errors = defGetBoolean(defel);
+ }
else if (strcmp(defel->defname, "delimiter") == 0)
{
if (opts_out->delim)
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 80bca79cd0..85c47f54b2 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -953,6 +953,7 @@ CopyFrom(CopyFromState cstate)
{
TupleTableSlot *myslot;
bool skip_tuple;
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
CHECK_FOR_INTERRUPTS();
@@ -985,9 +986,28 @@ CopyFrom(CopyFromState cstate)
ExecClearTuple(myslot);
+ if (cstate->opts.ignore_datatype_errors)
+ {
+ escontext.details_wanted = true;
+ cstate->escontext = escontext;
+ }
+
/* Directly store the values/nulls array in the slot */
if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
+ {
+ if (cstate->opts.ignore_datatype_errors && cstate->ignored_errors > 0)
+ ereport(WARNING,
+ errmsg("%zd rows were skipped due to data type incompatibility",
+ cstate->ignored_errors));
break;
+ }
+
+ /* Soft error occured, skip this tuple */
+ if (cstate->escontext.error_occurred)
+ {
+ ExecClearTuple(myslot);
+ continue;
+ }
ExecStoreVirtualTuple(myslot);
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 3853902a16..b06c44e298 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -70,6 +70,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/miscnodes.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
@@ -956,10 +957,20 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
}
else
- values[m] = InputFunctionCall(&in_functions[m],
- string,
- typioparams[m],
- att->atttypmod);
+ /* If IGNORE_DATATYPE_ERRORS is enabled skip rows with datatype errors */
+ if (!InputFunctionCallSafe(&in_functions[m],
+ string,
+ typioparams[m],
+ att->atttypmod,
+ (Node *) &cstate->escontext,
+ &values[m]))
+ {
+ cstate->ignored_errors++;
+
+ ereport(WARNING,
+ errmsg("%s", cstate->escontext.error_data->message));
+ return true;
+ }
cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index efe88ccf9d..22bf63b42b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -701,7 +701,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
HANDLER HAVING HEADER_P HOLD HOUR_P
- IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
+ IDENTITY_P IF_P IGNORE_DATATYPE_ERRORS ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
@@ -3378,6 +3378,10 @@ copy_opt_item:
{
$$ = makeDefElem("freeze", (Node *) makeBoolean(true), @1);
}
+ | IGNORE_DATATYPE_ERRORS
+ {
+ $$ = makeDefElem("ignore_datatype_errors", (Node *)makeBoolean(true), @1);
+ }
| DELIMITER opt_as Sconst
{
$$ = makeDefElem("delimiter", (Node *) makeString($3), @1);
@@ -16827,6 +16831,7 @@ unreserved_keyword:
| HOUR_P
| IDENTITY_P
| IF_P
+ | IGNORE_DATATYPE_ERRORS
| IMMEDIATE
| IMMUTABLE
| IMPLICIT_P
@@ -17382,6 +17387,7 @@ bare_label_keyword:
| HOLD
| IDENTITY_P
| IF_P
+ | IGNORE_DATATYPE_ERRORS
| ILIKE
| IMMEDIATE
| IMMUTABLE
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 33175868f6..c2e55ac21f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -42,6 +42,7 @@ typedef struct CopyFormatOptions
* -1 if not specified */
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
+ bool ignore_datatype_errors; /* ignore rows with datatype errors */
bool csv_mode; /* Comma Separated Value format? */
CopyHeaderChoice header_line; /* header line? */
char *null_print; /* NULL marker string (server encoding!) */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index ac2c16f8b8..1164c71631 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
#include "commands/copy.h"
#include "commands/trigger.h"
+#include "nodes/miscnodes.h"
/*
* Represents the different source cases we need to worry about at
@@ -94,6 +95,8 @@ typedef struct CopyFromStateData
* default value */
FmgrInfo *in_functions; /* array of input functions for each attrs */
Oid *typioparams; /* array of element types for in_functions */
+ ErrorSaveContext escontext; /* soft error trapper during in_functions execution */
+ int64 ignored_errors; /* total number of ignored errors */
int *defmap; /* array of default att numbers related to
* missing att */
ExprState **defexprs; /* array of default att expressions for all
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 753e9ee174..5ea159e879 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -196,6 +196,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD, BARE_LABEL)
PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD, AS_LABEL)
PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD, BARE_LABEL)
PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("ignore_datatype_errors", IGNORE_DATATYPE_ERRORS, UNRESERVED_KEYWORD, BARE_LABEL)
PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL)
PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD, BARE_LABEL)
PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 8e33eee719..a6bf3d66a4 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -666,6 +666,21 @@ SELECT * FROM instead_of_insert_tbl;
(2 rows)
COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+WARNING: invalid input syntax for type integer: "a"
+WARNING: value "3333333333" is out of range for type integer
+WARNING: invalid input syntax for type integer: "a"
+WARNING: invalid input syntax for type integer: ""
+WARNING: 4 rows were skipped due to data type incompatibility
+SELECT * FROM check_ign_err;
+ n | m | k
+---+-----+---
+ 1 | {1} | 1
+ 5 | {5} | 5
+(2 rows)
+
-- clean up
DROP TABLE forcetest;
DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index d759635068..c934029314 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -464,6 +464,18 @@ test1
SELECT * FROM instead_of_insert_tbl;
COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH IGNORE_DATATYPE_ERRORS;
+1 {1} 1
+a {2} 2
+3 {3} 3333333333
+4 {a, 4} 4
+
+5 {5} 5
+\.
+SELECT * FROM check_ign_err;
+
-- clean up
DROP TABLE forcetest;
DROP TABLE vistest;
base-commit: d69c404c4cc5985d8ae5b5ed38bed3400b317f82
--
2.25.1