On 2024-10-18 01:51, Fujii Masao wrote:
Thanks for your review and sorry for my late reply.

On 2024/10/17 22:45, torikoshia wrote:
Hi,

4ac2a9bec introduced reject_limit option to the COPY command, and I was wondering if it might be beneficial to add the same option to file_fdw.

Although there may be fewer practical use cases compared to COPY, it could still be useful in situations where the file being read via file_fdw is subject to modifications and there is a need to tolerate a limited number of errors.

Agreed.



What do you think?

I've attached a patch.

Thanks for the patch! Could you add it to the next CommitFest?

Added an entry for this patch:
https://commitfest.postgresql.org/50/5331/

+ALTER FOREIGN TABLE agg_bad OPTIONS (reject_limit '1');
+SELECT * FROM agg_bad;
+  a  |   b
+-----+--------
+ 100 | 99.097
+  42 | 324.78
+(2 rows)

Wouldn't it be better to include a test where a SELECT query fails, even with on_error set to "ignore," because the number of errors exceeds reject_limit?

Agreed.


+                       if (cstate->opts.reject_limit > 0 && \

The trailing backslash isn't needed here.
Agreed.



+  <varlistentry>
+   <term><literal>reject_limit</literal></term>

This entry should be placed right after the on_error option,
following the same order as in the COPY command documentation.


Based on the synopsis of the CREATE/ALTER FOREIGN TABLE commands, the value for the foreign table's option must be single-quoted. I’m not entirely sure if this is the correct approach, but in order to accommodate this, the patch modifies the value of reject_limit option to accept not only numeric values but also strings.


I don't have a better approach for this, so I'm okay with your solution.
Just one note: it would be helpful to explain and comment
why defGetCopyRejectLimitOption() accepts and parses both int64 and
string values.

Added a comment for this.


--
Regards,

--
Atsushi Torikoshi
Seconded from NTT DATA GROUP CORPORATION to SRA OSS K.K.
From 02b81f376d9b316a06d68c7bf714c6729100162c Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Tue, 5 Nov 2024 21:57:16 +0900
Subject: [PATCH v2] file_fdw: Add reject_limit option to file_fdw

Commit 4ac2a9bece introduced REJECT_LIMIT option for COPY. This patch
extends support for this option to file_fdw.

As well as REJECT_LIMIT option for COPY, this option limits the
maximum number of erroneous rows that can be skipped.
If more rows encounter data type conversion errors than allowed by
reject_limit, the access to the file_fdw foreign table will fail
with an error, even when on_error = 'ignore'.

Based on the synopsis of the CREATE/ALTER FOREIGN TABLE commands, the
value for the foreign table's option must be single-quoted.
To accommodate this, the patch modifies defGetCopyRejectLimitOption
to accept not only int64 but also strings.

---
 contrib/file_fdw/data/agg.bad2         |  5 +++++
 contrib/file_fdw/expected/file_fdw.out | 15 ++++++++++++++-
 contrib/file_fdw/file_fdw.c            |  8 ++++++++
 contrib/file_fdw/sql/file_fdw.sql      |  7 ++++++-
 doc/src/sgml/file-fdw.sgml             | 12 ++++++++++++
 src/backend/commands/copy.c            | 15 ++++++++++++++-
 6 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 contrib/file_fdw/data/agg.bad2

diff --git a/contrib/file_fdw/data/agg.bad2 b/contrib/file_fdw/data/agg.bad2
new file mode 100644
index 0000000000..04279ce55b
--- /dev/null
+++ b/contrib/file_fdw/data/agg.bad2
@@ -0,0 +1,5 @@
+56;@7.8@
+100;@99.097@
+0;@aaa@
+42;@324.78@
+1;@bbb@
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 593fdc782e..dbfae52baf 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -206,7 +206,7 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
 SELECT * FROM agg_bad;               -- ERROR
 ERROR:  invalid input syntax for type real: "aaa"
 CONTEXT:  COPY agg_bad, line 3, column b: "aaa"
--- on_error and log_verbosity tests
+-- on_error, log_verbosity and reject_limit tests
 ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
 SELECT * FROM agg_bad;
 NOTICE:  1 row was skipped due to data type incompatibility
@@ -224,6 +224,19 @@ SELECT * FROM agg_bad;
   42 | 324.78
 (2 rows)
 
+\set filename :abs_srcdir '/data/agg.bad2'
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET filename :'filename', ADD reject_limit '1');
+SELECT * FROM agg_bad;
+ERROR:  skipped more than REJECT_LIMIT (1) rows due to data type incompatibility
+CONTEXT:  COPY agg_bad, line 5, column b: "bbb"
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
+SELECT * FROM agg_bad;
+  a  |   b    
+-----+--------
+ 100 | 99.097
+  42 | 324.78
+(2 rows)
+
 ANALYZE agg_bad;
 -- misc query tests
 \t on
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 043204c3e7..9e2896f32a 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -77,6 +77,7 @@ static const struct FileFdwOption valid_options[] = {
 	{"encoding", ForeignTableRelationId},
 	{"on_error", ForeignTableRelationId},
 	{"log_verbosity", ForeignTableRelationId},
+	{"reject_limit", ForeignTableRelationId},
 	{"force_not_null", AttributeRelationId},
 	{"force_null", AttributeRelationId},
 
@@ -788,6 +789,13 @@ retry:
 			 */
 			ResetPerTupleExprContext(estate);
 
+			if (cstate->opts.reject_limit > 0 &&
+				cstate->num_errors > cstate->opts.reject_limit)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility",
+								(long long) cstate->opts.reject_limit)));
+
 			/* Repeat NextCopyFrom() until no soft error occurs */
 			goto retry;
 		}
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index edd77c5cd2..0208e095ae 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -150,11 +150,16 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
 -- error context report tests
 SELECT * FROM agg_bad;               -- ERROR
 
--- on_error and log_verbosity tests
+-- on_error, log_verbosity and reject_limit tests
 ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
 SELECT * FROM agg_bad;
 ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent');
 SELECT * FROM agg_bad;
+\set filename :abs_srcdir '/data/agg.bad2'
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET filename :'filename', ADD reject_limit '1');
+SELECT * FROM agg_bad;
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
+SELECT * FROM agg_bad;
 ANALYZE agg_bad;
 
 -- misc query tests
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index bb3579b077..882d9a76d2 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -138,6 +138,18 @@
    </listitem>
   </varlistentry>
 
+  <varlistentry>
+   <term><literal>reject_limit</literal></term>
+
+   <listitem>
+    <para>
+     Specifies the maximum number of errors tolerated while converting a column's
+     input value to its data type, the same as <command>COPY</command>'s
+    <literal>REJECT_LIMIT</literal> option.
+    </para>
+   </listitem>
+  </varlistentry>
+
   <varlistentry>
    <term><literal>log_verbosity</literal></term>
 
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663..c88f58742d 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -420,11 +420,24 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 
 /*
  * Extract REJECT_LIMIT value from a DefElem.
+ *
+ * Since foreign table options must be single-quoted values, this function
+ * accepts both int64 and string value.
  */
 static int64
 defGetCopyRejectLimitOption(DefElem *def)
 {
-	int64		reject_limit = defGetInt64(def);
+	int64		reject_limit;
+
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a numeric value",
+						def->defname)));
+	else if (nodeTag(def->arg) == T_String)
+		reject_limit = pg_strtoint64(strVal(def->arg));
+	else
+		reject_limit = defGetInt64(def);
 
 	if (reject_limit <= 0)
 		ereport(ERROR,
-- 
2.39.2

Reply via email to