From 0d50d01253feeade771f4bc591ac6e61162d08a4 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v10] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 src/backend/commands/publicationcmds.c    | 78 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 30 +++++++++
 src/include/catalog/pg_publication.h      |  7 ++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 ++++++++
 src/test/regress/sql/publication.sql      | 26 ++++++++
 src/test/subscription/t/100_bugs.pl       | 16 +----
 8 files changed, 183 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..053877c524 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,84 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/* Return if the table does not contain any generated columns */
+	if (!relation->rd_att->constr ||
+		!relation->rd_att->constr->has_generated_stored)
+		return false;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		TupleDesc	desc = RelationGetDescr(relation);
+		Bitmapset  *idattrs;
+		int			x;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+			return true;
+
+		/* Remember columns that are part of the REPLICA IDENTITY */
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
+
+			/* Check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..57599df153 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..be8f8eea8f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_valid_for_update = true;
+		pubdesc->replident_valid_for_delete = true;
 		return;
 	}
 
@@ -5750,6 +5752,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_valid_for_update = true;
+	pubdesc->replident_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5831,21 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			if (pubform->pubupdate)
+				pubdesc->replident_valid_for_update = false;
+			if (pubform->pubdelete)
+				pubdesc->replident_valid_for_delete = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
@@ -5848,6 +5867,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->replident_valid_for_update &&
+			!pubdesc->replident_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..486f609a9a 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		replident_valid_for_update;
+	bool		replident_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..caf6e31554 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

