Hi,
132de9968840c introduced SAVE_ERROR_TO option to COPY and enabled to
skip malformed data, but there is no way to watch the number of skipped
rows during COPY.
Attached patch adds tuples_skipped to pg_stat_progress_copy, which
counts the number of skipped tuples because source data is malformed.
If SAVE_ERROR_TO is not specified, this column remains zero.
The advantage would be that users can quickly notice and stop COPYing
when there is a larger amount of skipped data than expected, for
example.
As described in commit log, it is expected to add more choices for
SAVE_ERROR_TO like 'log' and using such options may enable us to know
the number of skipped tuples during COPY, but exposed in
pg_stat_progress_copy would be easier to monitor.
What do you think?
--
Regards,
--
Atsushi Torikoshi
NTT DATA Group Corporation
From 98e546ff2de380175708ce003f67c993299a3fb3 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Wed, 17 Jan 2024 13:41:44 +0900
Subject: [PATCH v1] Add tuples_skipped to pg_stat_progress_copy
132de9968840c enabled COPY to skip malformed data, but there is no way to watch the number of skipped rows during COPY.
This patch adds tuples_skipped to pg_stat_progress_copy, which counts the number of skipped tuple because source data is malformed.
If SAVE_ERROR_TO is not specified, this column remains zero.
Needs catalog bump.
---
doc/src/sgml/monitoring.sgml | 10 ++++++++++
src/backend/catalog/system_views.sql | 3 ++-
src/backend/commands/copyfrom.c | 5 +++++
src/include/commands/progress.h | 1 +
src/test/regress/expected/rules.out | 3 ++-
5 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b804eb8b5e..96ed774670 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -5779,6 +5779,16 @@ FROM pg_stat_get_backend_idset() AS backendid;
<command>WHERE</command> clause of the <command>COPY</command> command.
</para></entry>
</row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>tuples_skipped</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of tuples skipped because they contain malformed data
+ (if <literal>SAVE_ERROR_TO</literal> is specified, otherwise zero).
+ </para></entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index e43e36f5ac..6288270e2b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1318,7 +1318,8 @@ CREATE VIEW pg_stat_progress_copy AS
S.param1 AS bytes_processed,
S.param2 AS bytes_total,
S.param3 AS tuples_processed,
- S.param4 AS tuples_excluded
+ S.param4 AS tuples_excluded,
+ S.param7 AS tuples_skipped
FROM pg_stat_get_progress_info('COPY') AS S
LEFT JOIN pg_database D ON S.datid = D.oid;
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 4058b08134..fe33b0facf 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -650,6 +650,7 @@ CopyFrom(CopyFromState cstate)
CopyMultiInsertInfo multiInsertInfo = {0}; /* pacify compiler */
int64 processed = 0;
int64 excluded = 0;
+ int64 skipped = 0;
bool has_before_insert_row_trig;
bool has_instead_insert_row_trig;
bool leafpart_use_multi_insert = false;
@@ -1012,6 +1013,10 @@ CopyFrom(CopyFromState cstate)
*/
cstate->escontext->error_occurred = false;
+ /* Report that this tuple was skipped by the SAVE_ERROR_TO clause */
+ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+ ++skipped);
+
continue;
}
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index a458c8c50a..73afa77a9c 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -142,6 +142,7 @@
#define PROGRESS_COPY_TUPLES_EXCLUDED 3
#define PROGRESS_COPY_COMMAND 4
#define PROGRESS_COPY_TYPE 5
+#define PROGRESS_COPY_TUPLES_SKIPPED 6
/* Commands of COPY (as advertised via PROGRESS_COPY_COMMAND) */
#define PROGRESS_COPY_COMMAND_FROM 1
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 55f2e95352..5e846b01e6 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1988,7 +1988,8 @@ pg_stat_progress_copy| SELECT s.pid,
s.param1 AS bytes_processed,
s.param2 AS bytes_total,
s.param3 AS tuples_processed,
- s.param4 AS tuples_excluded
+ s.param4 AS tuples_excluded,
+ s.param7 AS tuples_skipped
FROM (pg_stat_get_progress_info('COPY'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
LEFT JOIN pg_database d ON ((s.datid = d.oid)));
pg_stat_progress_create_index| SELECT s.pid,
base-commit: 65c5864d7fac46516f17ee89085e349a87ee5bd7
--
2.39.2