Hi,

132de9968840c introduced SAVE_ERROR_TO option to COPY and enabled to skip malformed data, but there is no way to watch the number of skipped rows during COPY.

Attached patch adds tuples_skipped to pg_stat_progress_copy, which counts the number of skipped tuples because source data is malformed.
If SAVE_ERROR_TO is not specified, this column remains zero.

The advantage would be that users can quickly notice and stop COPYing when there is a larger amount of skipped data than expected, for example.

As described in commit log, it is expected to add more choices for SAVE_ERROR_TO like 'log' and using such options may enable us to know the number of skipped tuples during COPY, but exposed in pg_stat_progress_copy would be easier to monitor.


What do you think?

--
Regards,

--
Atsushi Torikoshi
NTT DATA Group Corporation
From 98e546ff2de380175708ce003f67c993299a3fb3 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Wed, 17 Jan 2024 13:41:44 +0900
Subject: [PATCH v1] Add tuples_skipped to pg_stat_progress_copy

132de9968840c enabled COPY to skip malformed data, but there is no way to watch the number of skipped rows during COPY.

This patch adds tuples_skipped to pg_stat_progress_copy, which counts the number of skipped tuple because source data is malformed.
If SAVE_ERROR_TO is not specified, this column remains zero.

Needs catalog bump.
---
 doc/src/sgml/monitoring.sgml         | 10 ++++++++++
 src/backend/catalog/system_views.sql |  3 ++-
 src/backend/commands/copyfrom.c      |  5 +++++
 src/include/commands/progress.h      |  1 +
 src/test/regress/expected/rules.out  |  3 ++-
 5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b804eb8b5e..96ed774670 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -5779,6 +5779,16 @@ FROM pg_stat_get_backend_idset() AS backendid;
        <command>WHERE</command> clause of the <command>COPY</command> command.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>tuples_skipped</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of tuples skipped because they contain malformed data
+       (if <literal>SAVE_ERROR_TO</literal> is specified, otherwise zero).
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index e43e36f5ac..6288270e2b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1318,7 +1318,8 @@ CREATE VIEW pg_stat_progress_copy AS
         S.param1 AS bytes_processed,
         S.param2 AS bytes_total,
         S.param3 AS tuples_processed,
-        S.param4 AS tuples_excluded
+        S.param4 AS tuples_excluded,
+        S.param7 AS tuples_skipped
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 4058b08134..fe33b0facf 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -650,6 +650,7 @@ CopyFrom(CopyFromState cstate)
 	CopyMultiInsertInfo multiInsertInfo = {0};	/* pacify compiler */
 	int64		processed = 0;
 	int64		excluded = 0;
+	int64		skipped = 0;
 	bool		has_before_insert_row_trig;
 	bool		has_instead_insert_row_trig;
 	bool		leafpart_use_multi_insert = false;
@@ -1012,6 +1013,10 @@ CopyFrom(CopyFromState cstate)
 				 */
 				cstate->escontext->error_occurred = false;
 
+			/* Report that this tuple was skipped by the SAVE_ERROR_TO clause */
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+											 ++skipped);
+
 			continue;
 		}
 
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index a458c8c50a..73afa77a9c 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -142,6 +142,7 @@
 #define PROGRESS_COPY_TUPLES_EXCLUDED 3
 #define PROGRESS_COPY_COMMAND 4
 #define PROGRESS_COPY_TYPE 5
+#define PROGRESS_COPY_TUPLES_SKIPPED 6
 
 /* Commands of COPY (as advertised via PROGRESS_COPY_COMMAND) */
 #define PROGRESS_COPY_COMMAND_FROM 1
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 55f2e95352..5e846b01e6 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1988,7 +1988,8 @@ pg_stat_progress_copy| SELECT s.pid,
     s.param1 AS bytes_processed,
     s.param2 AS bytes_total,
     s.param3 AS tuples_processed,
-    s.param4 AS tuples_excluded
+    s.param4 AS tuples_excluded,
+    s.param7 AS tuples_skipped
    FROM (pg_stat_get_progress_info('COPY'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_create_index| SELECT s.pid,

base-commit: 65c5864d7fac46516f17ee89085e349a87ee5bd7
-- 
2.39.2

Reply via email to