Hi,
As described in 9e2d870119, COPY ON_EEOR is expected to have more
"error_action".
(Note that option name was changed by b725b7eec)
I'd like to have a new option "log", which skips soft errors and logs
information that should have resulted in errors to PostgreSQL log.
I think this option has some advantages like below:
1) We can know which number of line input data was not loaded and
reason.
Example:
=# copy t1 from stdin with (on_error log);
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself, or an EOF
signal.
>> 1
>> 2
>> 3
>> z
>> \.
LOG: invalid input syntax for type integer: "z"
NOTICE: 1 row was skipped due to data type incompatibility
COPY 3
=# \! tail data/log/postgresql*.log
LOG: 22P02: invalid input syntax for type integer: "z"
CONTEXT: COPY t1, line 4, column i: "z"
LOCATION: pg_strtoint32_safe, numutils.c:620
STATEMENT: copy t1 from stdin with (on_error log);
2) Easier maintenance than storing error information in tables or
proprietary log files.
For example, in case a large number of soft errors occur, some
mechanisms are needed to prevent an infinite increase in the size of the
destination data, but we can left it to PostgreSQL's log rotation.
Attached a patch.
This basically comes from previous discussion[1] which did both "ignore"
and "log" soft error.
As shown in the example above, the log output to the client does not
contain CONTEXT, so I'm a little concerned that client cannot see what
line of the input data had a problem without looking at the server log.
What do you think?
[1]
https://www.postgresql.org/message-id/c0fb57b82b150953f26a5c7e340412e8%40oss.nttdata.com
--
Regards,
--
Atsushi Torikoshi
NTT DATA Group Corporation
From 04e643facfea4b4e8dd174d22fbe5e008747a91a Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Fri, 26 Jan 2024 01:17:59 +0900
Subject: [PATCH v1] Add new error_action "log" to ON_ERROR option
Currently ON_ERROR option only has "ignore" to skip malformed data and
there are no ways to know where and why COPY skipped them.
"log" skips malformed data as well as "ignore", but it logs information that
should have resulted in errors to PostgreSQL log.
---
doc/src/sgml/ref/copy.sgml | 8 ++++++--
src/backend/commands/copy.c | 4 +++-
src/backend/commands/copyfrom.c | 24 ++++++++++++++++++++----
src/include/commands/copy.h | 1 +
src/test/regress/expected/copy2.out | 14 +++++++++-----
src/test/regress/sql/copy2.sql | 9 +++++++++
6 files changed, 48 insertions(+), 12 deletions(-)
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 21a5c4a052..9662c90a8b 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -380,12 +380,16 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
<para>
Specifies which <replaceable class="parameter">
error_action</replaceable> to perform when there is malformed data in the input.
- Currently, only <literal>stop</literal> (default) and <literal>ignore</literal>
- values are supported.
+ Currently, only <literal>stop</literal> (default), <literal>ignore</literal>
+ and <literal>log</literal> values are supported.
If the <literal>stop</literal> value is specified,
<command>COPY</command> stops operation at the first error.
If the <literal>ignore</literal> value is specified,
<command>COPY</command> skips malformed data and continues copying data.
+ If the <literal>log</literal> value is specified,
+ <command>COPY</command> behaves the same as <literal>ignore</literal>, exept that
+ it logs information that should have resulted in errors to PostgreSQL log at
+ <literal>INFO</literal> level.
The option is allowed only in <command>COPY FROM</command>.
Only <literal>stop</literal> value is allowed when
using <literal>binary</literal> format.
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cc0786c6f4..812ca63350 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -415,13 +415,15 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
return COPY_ON_ERROR_STOP;
/*
- * Allow "stop", or "ignore" values.
+ * Allow "stop", "ignore" or "log" values.
*/
sval = defGetString(def);
if (pg_strcasecmp(sval, "stop") == 0)
return COPY_ON_ERROR_STOP;
if (pg_strcasecmp(sval, "ignore") == 0)
return COPY_ON_ERROR_IGNORE;
+ if (pg_strcasecmp(sval, "log") == 0)
+ return COPY_ON_ERROR_LOG;
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 1fe70b9133..7886bd5353 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1013,6 +1013,23 @@ CopyFrom(CopyFromState cstate)
*/
cstate->escontext->error_occurred = false;
+ else if (cstate->opts.on_error == COPY_ON_ERROR_LOG)
+ {
+ /* Adjust elevel so we don't jump out */
+ cstate->escontext->error_data->elevel = LOG;
+
+ /*
+ * Despite the name, this won't raise an error since elevel is
+ * LOG now.
+ */
+ ThrowErrorData(cstate->escontext->error_data);
+
+ /* Initialize escontext in preparation for next soft error */
+ cstate->escontext->error_occurred = false;
+ cstate->escontext->details_wanted = true;
+ memset(cstate->escontext->error_data, 0, sizeof(ErrorData));
+ }
+
/* Report that this tuple was skipped by the ON_ERROR clause */
pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
++skipped);
@@ -1462,12 +1479,11 @@ BeginCopyFrom(ParseState *pstate,
cstate->escontext->type = T_ErrorSaveContext;
cstate->escontext->error_occurred = false;
- /*
- * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
- * options later
- */
+ /* Error Details are required except when "ignore" is specified */
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
cstate->escontext->details_wanted = false;
+ else
+ cstate->escontext->details_wanted = true;
}
else
cstate->escontext = NULL;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index b3da3cb0be..c61ac2445f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
{
COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */
COPY_ON_ERROR_IGNORE, /* ignore errors */
+ COPY_ON_ERROR_LOG, /* save error to PostgreSQL log */
} CopyOnErrorChoice;
/*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 25c401ce34..dc3ac2b494 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -731,12 +731,16 @@ ERROR: invalid input syntax for type integer: "a"
CONTEXT: COPY check_ign_err, line 2, column n: "a"
COPY check_ign_err FROM STDIN WITH (on_error ignore);
NOTICE: 4 rows were skipped due to data type incompatibility
+COPY check_ign_err FROM STDIN WITH (on_error log);
+NOTICE: 4 rows were skipped due to data type incompatibility
SELECT * FROM check_ign_err;
- n | m | k
----+-----+---
- 1 | {1} | 1
- 5 | {5} | 5
-(2 rows)
+ n | m | k
+----+------+----
+ 1 | {1} | 1
+ 5 | {5} | 5
+ 6 | {6} | 6
+ 10 | {10} | 10
+(4 rows)
-- test datatype error that can't be handled as soft: should fail
CREATE TABLE hard_err(foo widget);
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index b5e549e856..54e1bc7f91 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -516,6 +516,15 @@ a {2} 2
5 {5} 5
\.
+
+COPY check_ign_err FROM STDIN WITH (on_error log);
+6 {6} 6
+a {7} 7
+8 {8} 8888888888
+9 {a, 9} 9
+
+10 {10} 10
+\.
SELECT * FROM check_ign_err;
-- test datatype error that can't be handled as soft: should fail
base-commit: 66ea94e8e606529bb334515f388c62314956739e
--
2.39.2