Since v5 patch failed applying anymore, updated the patch.
On 2023-03-23 02:50, Andres Freund wrote:
I suggest adding a few more tests:
- COPY with a datatype error that can't be handled as a soft error
I didn't know proper way to test this, but I've found data type widget's
input function widget_in() defined to occur hard-error in regress.c,
attached patch added a test using it.
--
Atsushi Torikoshi
NTT DATA CORPORATION
From de00c1555e0ee4a61565346946f4f3a4e851252c Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Mon, 21 Aug 2023 20:30:29 +0900
Subject: [PATCH v6] Add new COPY option IGNORE_DATATYPE_ERRORS
Currently entire COPY fails even when there is one unexpected data
regarding data type or range.
IGNORE_DATATYPE_ERRORS ignores these errors and skips them and COPY
data which don't contain problem.
This patch uses the soft error handling infrastructure, which is
introduced by d9f7f5d32f20.
Author: Damir Belyalov, Atsushi Torikoshi
---
doc/src/sgml/ref/copy.sgml | 13 +++++++++
src/backend/commands/copy.c | 13 +++++++++
src/backend/commands/copyfrom.c | 37 ++++++++++++++++++++++++
src/backend/commands/copyfromparse.c | 19 +++++++++---
src/bin/psql/tab-complete.c | 3 +-
src/include/commands/copy.h | 1 +
src/include/commands/copyfrom_internal.h | 3 ++
src/test/regress/expected/copy2.out | 28 ++++++++++++++++++
src/test/regress/sql/copy2.sql | 27 +++++++++++++++++
9 files changed, 139 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 4d614a0225..d5cdbb4025 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
+ IGNORE_DATATYPE_ERRORS [ <replaceable class="parameter">boolean</replaceable> ]
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
</synopsis>
</refsynopsisdiv>
@@ -370,6 +371,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>IGNORE_DATATYPE_ERRORS</literal></term>
+ <listitem>
+ <para>
+ Drops rows that contain malformed data while copying. These are rows
+ with columns where the data type's input-function raises an error.
+ This option is not allowed when using binary format. Note that this
+ is only supported in current <command>COPY</command> syntax.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>ENCODING</literal></term>
<listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f14fae3308..beb73f5357 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -419,6 +419,7 @@ ProcessCopyOptions(ParseState *pstate,
bool format_specified = false;
bool freeze_specified = false;
bool header_specified = false;
+ bool ignore_datatype_errors_specified = false;
ListCell *option;
/* Support external use for option sanity checking */
@@ -458,6 +459,13 @@ ProcessCopyOptions(ParseState *pstate,
freeze_specified = true;
opts_out->freeze = defGetBoolean(defel);
}
+ else if (strcmp(defel->defname, "ignore_datatype_errors") == 0)
+ {
+ if (ignore_datatype_errors_specified)
+ errorConflictingDefElem(defel, pstate);
+ ignore_datatype_errors_specified = true;
+ opts_out->ignore_datatype_errors = defGetBoolean(defel);
+ }
else if (strcmp(defel->defname, "delimiter") == 0)
{
if (opts_out->delim)
@@ -594,6 +602,11 @@ ProcessCopyOptions(ParseState *pstate,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DEFAULT in BINARY mode")));
+ if (opts_out->binary && opts_out->ignore_datatype_errors)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot specify IGNORE_DATATYPE_ERRORS in BINARY mode")));
+
/* Set defaults for omitted options */
if (!opts_out->delim)
opts_out->delim = opts_out->csv_mode ? "," : "\t";
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index b47cb5c66d..853adb8414 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -752,6 +752,14 @@ CopyFrom(CopyFromState cstate)
ti_options |= TABLE_INSERT_FROZEN;
}
+ /* Set up soft error handler for IGNORE_DATATYPE_ERRORS */
+ if (cstate->opts.ignore_datatype_errors)
+ {
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+ escontext.details_wanted = true;
+ cstate->escontext = escontext;
+ }
+
/*
* We need a ResultRelInfo so we can use the regular executor's
* index-entry-making machinery. (There used to be a huge amount of code
@@ -987,7 +995,36 @@ CopyFrom(CopyFromState cstate)
/* Directly store the values/nulls array in the slot */
if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
+ {
+ if (cstate->opts.ignore_datatype_errors &&
+ cstate->ignored_error_count > 0)
+ ereport(WARNING,
+ errmsg("%zd rows were skipped due to data type incompatibility",
+ cstate->ignored_error_count));
break;
+ }
+
+ /* Soft error occured, skip this tuple and log the reason */
+ if (cstate->escontext.error_occurred)
+ {
+ ErrorSaveContext new_escontext = {T_ErrorSaveContext};
+
+ /* Adjust elevel so we don't jump out */
+ cstate->escontext.error_data->elevel = WARNING;
+
+ /*
+ * Despite the name, this won't raise an error since elevel is
+ * WARNING now.
+ */
+ ThrowErrorData(cstate->escontext.error_data);
+
+ ExecClearTuple(myslot);
+
+ new_escontext.details_wanted = true;
+ cstate->escontext = new_escontext;
+
+ continue;
+ }
ExecStoreVirtualTuple(myslot);
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 232768a6e1..e44b555f48 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -70,6 +70,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/miscnodes.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
@@ -956,10 +957,20 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
}
else
- values[m] = InputFunctionCall(&in_functions[m],
- string,
- typioparams[m],
- att->atttypmod);
+ /*
+ * If IGNORE_DATATYPE_ERRORS is enabled, skip rows with
+ * datatype errors. */
+ if (!InputFunctionCallSafe(&in_functions[m],
+ string,
+ typioparams[m],
+ att->atttypmod,
+ (Node *) &cstate->escontext,
+ &values[m]))
+ {
+ cstate->ignored_error_count++;
+
+ return true;
+ }
cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 779fdc90cb..2fba51f648 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2869,7 +2869,8 @@ psql_completion(const char *text, int start, int end)
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "("))
COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
"HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
- "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT");
+ "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
+ "IGNORE_DATATYPE_ERRORS");
/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 33175868f6..c2e55ac21f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -42,6 +42,7 @@ typedef struct CopyFormatOptions
* -1 if not specified */
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
+ bool ignore_datatype_errors; /* ignore rows with datatype errors */
bool csv_mode; /* Comma Separated Value format? */
CopyHeaderChoice header_line; /* header line? */
char *null_print; /* NULL marker string (server encoding!) */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index ac2c16f8b8..d5801df06c 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
#include "commands/copy.h"
#include "commands/trigger.h"
+#include "nodes/miscnodes.h"
/*
* Represents the different source cases we need to worry about at
@@ -94,6 +95,8 @@ typedef struct CopyFromStateData
* default value */
FmgrInfo *in_functions; /* array of input functions for each attrs */
Oid *typioparams; /* array of element types for in_functions */
+ ErrorSaveContext escontext; /* soft error trapper during in_functions execution */
+ int64 ignored_error_count; /* total number of ignored errors */
int *defmap; /* array of default att numbers related to
* missing att */
ExprState **defexprs; /* array of default att expressions for all
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index faf1a4d1b0..ac9c99f083 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -82,6 +82,8 @@ COPY x to stdin (format BINARY, delimiter ',');
ERROR: cannot specify DELIMITER in BINARY mode
COPY x to stdin (format BINARY, null 'x');
ERROR: cannot specify NULL in BINARY mode
+COPY x to stdin (format BINARY, ignore_datatype_errors);
+ERROR: cannot specify IGNORE_DATATYPE_ERRORS in BINARY mode
COPY x to stdin (format TEXT, force_quote(a));
ERROR: COPY force quote available only in CSV mode
COPY x from stdin (format CSV, force_quote(a));
@@ -666,6 +668,30 @@ SELECT * FROM instead_of_insert_tbl;
(2 rows)
COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+WARNING: invalid input syntax for type integer: "a"
+WARNING: value "3333333333" is out of range for type integer
+WARNING: invalid input syntax for type integer: "a"
+WARNING: invalid input syntax for type integer: ""
+WARNING: 4 rows were skipped due to data type incompatibility
+SELECT * FROM check_ign_err;
+ n | m | k
+---+-----+---
+ 1 | {1} | 1
+ 5 | {5} | 5
+(2 rows)
+
+-- test datatype error that can't be handled as soft: should fail
+CREATE TABLE hard_err(foo widget);
+COPY hard_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+ERROR: invalid input syntax for type widget: "1"
+CONTEXT: COPY hard_err, line 1, column foo: "1"
+-- test missing data: should fail
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+ERROR: missing data for column "k"
+CONTEXT: COPY check_ign_err, line 1: "1 {1}"
-- clean up
DROP TABLE forcetest;
DROP TABLE vistest;
@@ -680,6 +706,8 @@ DROP TABLE instead_of_insert_tbl;
DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
+DROP TABLE check_ign_err;
+DROP TABLE hard_err;
--
-- COPY FROM ... DEFAULT
--
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index d759635068..7c6a7abe42 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -70,6 +70,7 @@ COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
-- incorrect options
COPY x to stdin (format BINARY, delimiter ',');
COPY x to stdin (format BINARY, null 'x');
+COPY x to stdin (format BINARY, ignore_datatype_errors);
COPY x to stdin (format TEXT, force_quote(a));
COPY x from stdin (format CSV, force_quote(a));
COPY x to stdout (format TEXT, force_not_null(a));
@@ -464,6 +465,30 @@ test1
SELECT * FROM instead_of_insert_tbl;
COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+1 {1} 1
+a {2} 2
+3 {3} 3333333333
+4 {a, 4} 4
+
+5 {5} 5
+\.
+SELECT * FROM check_ign_err;
+
+
+-- test datatype error that can't be handled as soft: should fail
+CREATE TABLE hard_err(foo widget);
+COPY hard_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+1
+\.
+
+-- test missing data: should fail
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+1 {1}
+\.
+
-- clean up
DROP TABLE forcetest;
DROP TABLE vistest;
@@ -478,6 +503,8 @@ DROP TABLE instead_of_insert_tbl;
DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
+DROP TABLE check_ign_err;
+DROP TABLE hard_err;
--
-- COPY FROM ... DEFAULT
--
2.39.2