On Mon, Sep 9, 2024 at 10:34 PM Jim Jones <jim.jo...@uni-muenster.de> wrote:
>
>
> Hi there
>
> On 26.08.24 02:00, jian he wrote:
> > hi all.
> > patch updated.
> > simplified the code a lot.
> >
> > idea is same:
> > COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
> >
> > If the STDIN number of columns is the same as the target table, then
> > InputFunctionCallSafe
> > call failure will make that column values be null.
> >
> >
> > If the STDIN number of columns is not the same as the target table, then 
> > error
> > ERROR:  missing data for column \"%s\"
> > ERROR:  extra data after last expected column
> > which is status quo.
>
> I wanted to give it another try, but the patch does not apply ...
>

here we are.
please check the attached file.
From 3d6b3d8b0393b5bc4950e85c40c69c0da46c8035 Mon Sep 17 00:00:00 2001
From: jian he <jian.universal...@gmail.com>
Date: Thu, 12 Sep 2024 17:07:02 +0800
Subject: [PATCH v4 1/1] on_error set_to_null

extent "on_error action", introduce new option: on_error set_to_null.

Due to current grammar, we cannot use "on_error null",
so I choose on_error set_to_null.

any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM and non-binary format.

However this will respect the not-null constraint, meaning, if you actually converted error to null,
but the column has not-null constraint, not-null constraint violation ERROR will be raised.

discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=bp3d1_asfe...@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           |  1 +
 src/backend/commands/copy.c          |  4 ++-
 src/backend/commands/copyfrom.c      |  9 ++---
 src/backend/commands/copyfromparse.c | 11 +++++++
 src/include/commands/copy.h          |  1 +
 src/test/regress/expected/copy2.out  | 49 ++++++++++++++++++++++++++++
 src/test/regress/sql/copy2.sql       | 44 +++++++++++++++++++++++++
 7 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 1518af8a04..b6bdf45e7e 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,6 +394,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
       An <replaceable class="parameter">error_action</replaceable> value of
       <literal>stop</literal> means fail the command, while
       <literal>ignore</literal> means discard the input row and continue with the next one.
+      <literal>set_to_null</literal> means the input value will set to <literal>null</literal> and continue with the next one.
       The default is <literal>stop</literal>.
      </para>
      <para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3bb579a3a4..05b152a090 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 				 parser_errposition(pstate, def->location)));
 
 	/*
-	 * Allow "stop", or "ignore" values.
+	 * Allow "stop", or "ignore", "set_to_null" values.
 	 */
 	if (pg_strcasecmp(sval, "stop") == 0)
 		return COPY_ON_ERROR_STOP;
 	if (pg_strcasecmp(sval, "ignore") == 0)
 		return COPY_ON_ERROR_IGNORE;
+	if (pg_strcasecmp(sval, "set_to_null") == 0)
+		return COPY_ON_ERROR_NULL;
 
 	ereport(ERROR,
 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 2d3462913e..1669fac444 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1319,7 +1319,7 @@ CopyFrom(CopyFromState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
+	if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
 		cstate->num_errors > 0)
 		ereport(NOTICE,
 				errmsg_plural("%llu row was skipped due to data type incompatibility",
@@ -1471,10 +1471,11 @@ BeginCopyFrom(ParseState *pstate,
 		cstate->escontext->error_occurred = false;
 
 		/*
-		 * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
-		 * options later
+		 * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+		 * We'll add other options later
 		 */
-		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+		if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+			cstate->opts.on_error == COPY_ON_ERROR_NULL)
 			cstate->escontext->details_wanted = false;
 	}
 	else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 97a4c387a3..3fe32b76ac 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -969,6 +969,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 			{
 				Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
+				if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+				{
+					values[m] = (Datum) 0;
+					nulls[m] = true;
+					/*
+					 * set error_occurred to false, so next
+					 * InputFunctionCallSafe call behave sane.
+					*/
+					cstate->escontext->error_occurred = false;
+					continue;
+				}
 				cstate->num_errors++;
 
 				if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 141fd48dc1..fa87232ed7 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
 {
 	COPY_ON_ERROR_STOP = 0,		/* immediately throw errors, default */
 	COPY_ON_ERROR_IGNORE,		/* ignore errors */
+	COPY_ON_ERROR_NULL,			/* set error field to null */
 } CopyOnErrorChoice;
 
 /*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 61a19cdc4c..b92a5771ff 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
                                             ^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR:  conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+                                                 ^
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 ERROR:  conflicting or redundant options
 LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +96,8 @@ COPY x to stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
 COPY x from stdin (format BINARY, on_error ignore);
 ERROR:  only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR:  only ON_ERROR STOP is allowed in BINARY mode
 COPY x from stdin (on_error unsupported);
 ERROR:  COPY ON_ERROR "unsupported" not recognized
 LINE 1: COPY x from stdin (on_error unsupported);
@@ -112,6 +118,10 @@ COPY x to stdin (format BINARY, on_error unsupported);
 ERROR:  COPY ON_ERROR cannot be used with COPY TO
 LINE 1: COPY x to stdin (format BINARY, on_error unsupported);
                                         ^
+COPY x to stdin (on_error set_to_null);
+ERROR:  COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+                         ^
 COPY x to stdout (log_verbosity unsupported);
 ERROR:  COPY LOG_VERBOSITY "unsupported" not recognized
 LINE 1: COPY x to stdout (log_verbosity unsupported);
@@ -753,6 +763,24 @@ CONTEXT:  COPY check_ign_err
 NOTICE:  skipping row due to data type incompatibility at line 8 for column "k": "a"
 CONTEXT:  COPY check_ign_err
 NOTICE:  6 rows were skipped due to data type incompatibility
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR:  null value in column "c" of relation "t_on_error_null" violates not-null constraint
+DETAIL:  Failing row contains (11, null, 12).
+CONTEXT:  COPY t_on_error_null, line 1: "11	a	12"
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+select * from t_on_error_null;
+ a | c  | b  
+---+----+----
+   | 11 | 13
+   | 11 | 14
+(2 rows)
+
+drop table t_on_error_null;
+drop domain d_int_not_null;
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -789,6 +817,27 @@ CONTEXT:  COPY check_ign_err, line 1: "1	{1}"
 COPY check_ign_err FROM STDIN WITH (on_error ignore);
 ERROR:  extra data after last expected column
 CONTEXT:  COPY check_ign_err, line 1: "1	{1}	3	abc"
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+\pset null NULL
+SELECT * FROM check_ign_err;
+  n   |  m  |  k   
+------+-----+------
+    1 | {1} | NULL
+    2 | {2} |    1
+    3 | {3} |    2
+    4 | {4} | NULL
+ NULL | {5} | NULL
+(5 rows)
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1, column m: ""
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR:  extra data after last expected column
+CONTEXT:  COPY check_ign_err, line 1: "1,{1},1,1"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 8b14962194..1144822768 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,14 @@ COPY x from stdin (force_null (a), force_null (b));
 COPY x from stdin (convert_selectively (a), convert_selectively (b));
 COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
 COPY x from stdin (log_verbosity default, log_verbosity verbose);
 
 -- incorrect options
 COPY x to stdin (format BINARY, delimiter ',');
 COPY x to stdin (format BINARY, null 'x');
 COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
 COPY x from stdin (on_error unsupported);
 COPY x to stdin (format TEXT, force_quote(a));
 COPY x from stdin (format CSV, force_quote(a));
@@ -81,6 +83,7 @@ COPY x to stdin (format CSV, force_not_null(a));
 COPY x to stdout (format TEXT, force_null(a));
 COPY x to stdin (format CSV, force_null(a));
 COPY x to stdin (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
 COPY x to stdout (log_verbosity unsupported);
 
 -- too many columns in column list: should fail
@@ -526,6 +529,24 @@ a	{2}	2
 8	{8}	8
 \.
 
+create domain d_int_not_null as int not null check(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, c int not null, b int);
+
+--fail
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+11	a	12
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1	11	13
+a	11	14
+\.
+
+select * from t_on_error_null;
+drop table t_on_error_null;
+drop domain d_int_not_null;
+
 -- tests for on_error option with log_verbosity and null constraint via domain
 CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
 CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -557,6 +578,29 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore);
 1	{1}	3	abc
 \.
 
+
+truncate check_ign_err;
+COPY check_ign_err FROM STDIN WITH (on_error set_to_null);
+1	{1}	a
+2	{2}	1
+3	{3}	2
+4	{4}	b
+a	{5}	c
+\.
+
+\pset null NULL
+SELECT * FROM check_ign_err;
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,
+\.
+
+--should fail.
+COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,{1},1,1
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;

base-commit: 00c76cf21c42c17e60e73a87dea0d1b4e234d9da
-- 
2.34.1

Reply via email to