On 2024-11-12 01:49, Fujii Masao wrote:
On 2024/11/11 21:45, torikoshia wrote:
Thanks for adding the comment. It clearly states that REJECT_LIMIT
can be
a single-quoted string. However, it might also be helpful to mention
that
it can be provided as an int64 in the COPY command option. How about
updating it like this?
------------------------------------
REJECT_LIMIT can be specified in two ways: as an int64 for the COPY
command
option or as a single-quoted string for the foreign table option
using
file_fdw. Therefore this function needs to handle both formats.
------------------------------------
Thanks! it seems better.
Attached v3 patch.
Thanks for updating the patch! It looks like you forgot to attach it,
though.
Oops, thanks for pointing it out.
Here it is.
--
Regards,
--
Atsushi Torikoshi
Seconded from NTT DATA GROUP CORPORATION to SRA OSS K.K.
From 6380a1a52e125671cfecb3a9056019ff85a32dd7 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Mon, 11 Nov 2024 21:36:57 +0900
Subject: [PATCH v3] file_fdw: Add reject_limit option to file_fdw
Commit 4ac2a9bece introduced REJECT_LIMIT option for COPY. This patch
extends support for this option to file_fdw.
As well as REJECT_LIMIT option for COPY, this option limits the
maximum number of erroneous rows that can be skipped.
If more rows encounter data type conversion errors than allowed by
reject_limit, the access to the file_fdw foreign table will fail
with an error, even when on_error = 'ignore'.
Based on the synopsis of the CREATE/ALTER FOREIGN TABLE commands, the
value for the foreign table's option must be single-quoted.
To accommodate this, the patch modifies defGetCopyRejectLimitOption
to accept not only int64 but also strings.
---
contrib/file_fdw/data/agg.bad | 1 +
contrib/file_fdw/expected/file_fdw.out | 16 ++++++++++++++--
contrib/file_fdw/file_fdw.c | 8 ++++++++
contrib/file_fdw/sql/file_fdw.sql | 6 +++++-
doc/src/sgml/file-fdw.sgml | 12 ++++++++++++
src/backend/commands/copy.c | 16 +++++++++++++++-
6 files changed, 55 insertions(+), 4 deletions(-)
diff --git a/contrib/file_fdw/data/agg.bad b/contrib/file_fdw/data/agg.bad
index 3415b15007..04279ce55b 100644
--- a/contrib/file_fdw/data/agg.bad
+++ b/contrib/file_fdw/data/agg.bad
@@ -2,3 +2,4 @@
100;@99.097@
0;@aaa@
42;@324.78@
+1;@bbb@
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 593fdc782e..f52e82bfcf 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -206,10 +206,10 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
SELECT * FROM agg_bad; -- ERROR
ERROR: invalid input syntax for type real: "aaa"
CONTEXT: COPY agg_bad, line 3, column b: "aaa"
--- on_error and log_verbosity tests
+-- on_error, log_verbosity and reject_limit tests
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
SELECT * FROM agg_bad;
-NOTICE: 1 row was skipped due to data type incompatibility
+NOTICE: 2 rows were skipped due to data type incompatibility
a | b
-----+--------
100 | 99.097
@@ -224,6 +224,18 @@ SELECT * FROM agg_bad;
42 | 324.78
(2 rows)
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR
+SELECT * FROM agg_bad;
+ERROR: skipped more than REJECT_LIMIT (1) rows due to data type incompatibility
+CONTEXT: COPY agg_bad, line 5, column b: "bbb"
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
+SELECT * FROM agg_bad;
+ a | b
+-----+--------
+ 100 | 99.097
+ 42 | 324.78
+(2 rows)
+
ANALYZE agg_bad;
-- misc query tests
\t on
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 043204c3e7..9e2896f32a 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -77,6 +77,7 @@ static const struct FileFdwOption valid_options[] = {
{"encoding", ForeignTableRelationId},
{"on_error", ForeignTableRelationId},
{"log_verbosity", ForeignTableRelationId},
+ {"reject_limit", ForeignTableRelationId},
{"force_not_null", AttributeRelationId},
{"force_null", AttributeRelationId},
@@ -788,6 +789,13 @@ retry:
*/
ResetPerTupleExprContext(estate);
+ if (cstate->opts.reject_limit > 0 &&
+ cstate->num_errors > cstate->opts.reject_limit)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
+ (long long) cstate->opts.reject_limit)));
+
/* Repeat NextCopyFrom() until no soft error occurs */
goto retry;
}
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index edd77c5cd2..0d6920e1f6 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -150,11 +150,15 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
--- on_error and log_verbosity tests
+-- on_error, log_verbosity and reject_limit tests
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
SELECT * FROM agg_bad;
ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent');
SELECT * FROM agg_bad;
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR
+SELECT * FROM agg_bad;
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
+SELECT * FROM agg_bad;
ANALYZE agg_bad;
-- misc query tests
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index bb3579b077..882d9a76d2 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -138,6 +138,18 @@
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>reject_limit</literal></term>
+
+ <listitem>
+ <para>
+ Specifies the maximum number of errors tolerated while converting a column's
+ input value to its data type, the same as <command>COPY</command>'s
+ <literal>REJECT_LIMIT</literal> option.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>log_verbosity</literal></term>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663..2d98ecf3f4 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -420,11 +420,25 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
/*
* Extract REJECT_LIMIT value from a DefElem.
+ *
+ * REJECT_LIMIT can be specified in two ways: as an int64 for the COPY command
+ * option or as a single-quoted string for the foreign table option using
+ * file_fdw. Therefore this function needs to handle both formats.
*/
static int64
defGetCopyRejectLimitOption(DefElem *def)
{
- int64 reject_limit = defGetInt64(def);
+ int64 reject_limit;
+
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ else if (nodeTag(def->arg) == T_String)
+ reject_limit = pg_strtoint64(strVal(def->arg));
+ else
+ reject_limit = defGetInt64(def);
if (reject_limit <= 0)
ereport(ERROR,
--
2.39.2