Hi, hackers
ACL lists in tables may potentially be large in size. I suggest adding TOAST
support for system tables, namely pg_class, pg_attribute and
pg_largeobject_metadata, for they include ACL columns.
In commit 96cdeae0 these tables were missed because of possible conflicts with
VACUUM FULL and problem with seeing a non-empty new cluster by pg_upgrade.
Given patch fixes both expected bugs. Also patch adds a new regress test for
checking TOAST properly working with ACL attributes. Suggested feature has been
used in Postgres Pro Enterprise for a long time, and it helps with large ACL's.
The VACUUM FULL bug is detailed here:
http://postgr.es/m/CAPpHfdu3PJUzHpQrvJ5RC9bEX_bZ6LwX52kBpb0EiD_9e3Np5g%40mail.gmail.com
Looking forward to your comments
--
Sofia Kopikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
From 0a99d94a1ec83d0621bb9805e87d8953a0ba4000 Mon Sep 17 00:00:00 2001
From: Sofia Kopikova <s.kopik...@postgrespro.ru>
Date: Tue, 25 Jan 2022 14:44:14 +0300
Subject: [PATCH] Add TOAST support for several system tables
ACL lists may have large size. Using TOAST for system tables pg_class,
pg_attribute and pg_largeobject_metadata with ACL columns.
Bugs with conflicting VACUUM FULL and TOAST and pg_upgrade seeing
a non-empty new cluster are fixed.
---
src/backend/access/heap/heapam.c | 67 +++++++++++++++++--
src/backend/access/heap/pruneheap.c | 4 ++
src/backend/catalog/catalog.c | 2 +
src/backend/commands/cluster.c | 66 +++++++++++-------
src/bin/pg_upgrade/check.c | 3 +-
src/include/catalog/pg_attribute.h | 2 +
src/include/catalog/pg_class.h | 2 +
src/include/catalog/pg_largeobject_metadata.h | 2 +
src/include/commands/cluster.h | 1 +
src/test/regress/expected/misc_sanity.out | 27 +++-----
.../regress/expected/pg_catalog_toast1.out | 25 +++++++
src/test/regress/parallel_schedule | 3 +
src/test/regress/sql/misc_sanity.sql | 7 +-
src/test/regress/sql/pg_catalog_toast1.sql | 20 ++++++
14 files changed, 178 insertions(+), 53 deletions(-)
create mode 100644 src/test/regress/expected/pg_catalog_toast1.out
create mode 100644 src/test/regress/sql/pg_catalog_toast1.sql
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 6ec57f3d8b2..e49bf3fcb53 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -5958,6 +5958,53 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
pgstat_count_heap_delete(relation);
}
+/*
+ * Prepare tuple for inplace update. TOASTed attributes can't be modified
+ * by in-place upgrade. Simultaneously, new tuple is flatten. So, we just
+ * replace TOASTed attributes with their values of old tuple.
+ */
+static HeapTuple
+heap_inplace_update_prepare_tuple(Relation relation,
+ HeapTuple oldtup,
+ HeapTuple newtup)
+{
+ TupleDesc desc = relation->rd_att;
+ HeapTuple result;
+ Datum *oldvals,
+ *newvals;
+ bool *oldnulls,
+ *newnulls;
+ int i,
+ natts = desc->natts;
+
+ oldvals = (Datum *) palloc(sizeof(Datum) * natts);
+ newvals = (Datum *) palloc(sizeof(Datum) * natts);
+ oldnulls = (bool *) palloc(sizeof(bool) * natts);
+ newnulls = (bool *) palloc(sizeof(bool) * natts);
+
+ heap_deform_tuple(oldtup, desc, oldvals, oldnulls);
+ heap_deform_tuple(newtup, desc, newvals, newnulls);
+
+ for (i = 0; i < natts; i++)
+ {
+ Form_pg_attribute att = &desc->attrs[i];
+
+ if (att->attlen == -1 &&
+ !oldnulls[i] &&
+ VARATT_IS_EXTENDED(oldvals[i]))
+ {
+ Assert(!newnulls[i]);
+ newvals[i] = oldvals[i];
+ }
+ }
+
+ result = heap_form_tuple(desc, newvals, newnulls);
+
+ result->t_self = newtup->t_self;
+
+ return result;
+}
+
/*
* heap_inplace_update - update a tuple "in place" (ie, overwrite it)
*
@@ -5987,6 +6034,8 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
HeapTupleHeader htup;
uint32 oldlen;
uint32 newlen;
+ HeapTupleData oldtup;
+ HeapTuple newtup;
/*
* For now, we don't allow parallel updates. Unlike a regular update,
@@ -6012,16 +6061,26 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
htup = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = htup;
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = tuple->t_self;
+#ifdef XID_IS_64BIT
+ HeapTupleCopyBaseFromPage(&oldtup, page);
+#endif
+
+ newtup = heap_inplace_update_prepare_tuple(relation, &oldtup, tuple);
+
oldlen = ItemIdGetLength(lp) - htup->t_hoff;
- newlen = tuple->t_len - tuple->t_data->t_hoff;
- if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
+ newlen = newtup->t_len - newtup->t_data->t_hoff;
+ if (oldlen != newlen || htup->t_hoff != newtup->t_data->t_hoff)
elog(ERROR, "wrong tuple length");
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
memcpy((char *) htup + htup->t_hoff,
- (char *) tuple->t_data + tuple->t_data->t_hoff,
+ (char *) newtup->t_data + newtup->t_data->t_hoff,
newlen);
MarkBufferDirty(buffer);
@@ -6032,7 +6091,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
xl_heap_inplace xlrec;
XLogRecPtr recptr;
- xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
+ xlrec.offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index b3e2eec52fa..bd15b03e97a 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -20,6 +20,7 @@
#include "access/transam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
+#include "commands/cluster.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
@@ -121,6 +122,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
if (RecoveryInProgress())
return;
+ if (is_inside_rebuild_relation())
+ return;
+
/*
* XXX: Magic to keep old_snapshot_threshold tests appear "working". They
* currently are broken, and discussion of what to do about them is
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index dfd5fb669ee..eb88b04ebc6 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -289,6 +289,8 @@ IsSharedRelation(Oid relationId)
relationId == PgShseclabelToastIndex ||
relationId == PgSubscriptionToastTable ||
relationId == PgSubscriptionToastIndex ||
+ relationId == PgDatabaseToastTable ||
+ relationId == PgDatabaseToastIndex ||
relationId == PgTablespaceToastTable ||
relationId == PgTablespaceToastIndex)
return true;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 61853e6dec4..90bbe71c9db 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -564,6 +564,8 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
table_close(pg_index, RowExclusiveLock);
}
+bool inside_rebuild_relation = false;
+
/*
* rebuild_relation: rebuild an existing relation in index or physical order
*
@@ -585,37 +587,53 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
TransactionId frozenXid;
MultiXactId cutoffMulti;
- /* Mark the correct index as clustered */
- if (OidIsValid(indexOid))
- mark_index_clustered(OldHeap, indexOid, true);
+ PG_TRY();
+ {
+ inside_rebuild_relation = true;
+
+ /* Mark the correct index as clustered */
+ if (OidIsValid(indexOid))
+ mark_index_clustered(OldHeap, indexOid, true);
- /* Remember info about rel before closing OldHeap */
- relpersistence = OldHeap->rd_rel->relpersistence;
- is_system_catalog = IsSystemRelation(OldHeap);
+ /* Remember info about rel before closing OldHeap */
+ relpersistence = OldHeap->rd_rel->relpersistence;
+ is_system_catalog = IsSystemRelation(OldHeap);
- /* Close relcache entry, but keep lock until transaction commit */
- table_close(OldHeap, NoLock);
+ /* Close relcache entry, but keep lock until transaction commit */
+ table_close(OldHeap, NoLock);
- /* Create the transient table that will receive the re-ordered data */
- OIDNewHeap = make_new_heap(tableOid, tableSpace,
- accessMethod,
- relpersistence,
- AccessExclusiveLock);
+ /* Create the transient table that will receive the re-ordered data */
+ OIDNewHeap = make_new_heap(tableOid, tableSpace,
+ accessMethod,
+ relpersistence,
+ AccessExclusiveLock);
- /* Copy the heap data into the new table in the desired order */
- copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
- &swap_toast_by_content, &frozenXid, &cutoffMulti);
+ /* Copy the heap data into the new table in the desired order */
+ copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
+ &swap_toast_by_content, &frozenXid, &cutoffMulti);
- /*
- * Swap the physical files of the target and transient tables, then
- * rebuild the target's indexes and throw away the transient table.
- */
- finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
- swap_toast_by_content, false, true,
- frozenXid, cutoffMulti,
- relpersistence);
+ /*
+ * Swap the physical files of the target and transient tables, then
+ * rebuild the target's indexes and throw away the transient table.
+ */
+ finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
+ swap_toast_by_content, false, true,
+ frozenXid, cutoffMulti,
+ relpersistence);
+ }
+ PG_CATCH();
+ {
+ inside_rebuild_relation = false;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
}
+bool
+is_inside_rebuild_relation()
+{
+ return inside_rebuild_relation;
+}
/*
* Create the transient table that will be filled with new data during
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 3d218c2ad24..1a9f1978195 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -419,7 +419,8 @@ check_new_cluster_is_empty(void)
relnum++)
{
/* pg_largeobject and its index should be skipped */
- if (strcmp(rel_arr->rels[relnum].nspname, "pg_catalog") != 0)
+ if (strcmp(rel_arr->rels[relnum].nspname, "pg_catalog") != 0 &&
+ strcmp(rel_arr->rels[relnum].nspname, "pg_toast") != 0)
pg_fatal("New cluster database \"%s\" is not empty: found relation \"%s.%s\"\n",
new_cluster.dbarr.dbs[dbnum].db_name,
rel_arr->rels[relnum].nspname,
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h
index 053294c99f3..b4d7196d91a 100644
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -206,6 +206,8 @@ CATALOG(pg_attribute,1249,AttributeRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(75,
*/
typedef FormData_pg_attribute *Form_pg_attribute;
+DECLARE_TOAST(pg_attribute, 9001, 9002);
+
DECLARE_UNIQUE_INDEX(pg_attribute_relid_attnam_index, 2658, AttributeRelidNameIndexId, on pg_attribute using btree(attrelid oid_ops, attname name_ops));
DECLARE_UNIQUE_INDEX_PKEY(pg_attribute_relid_attnum_index, 2659, AttributeRelidNumIndexId, on pg_attribute using btree(attrelid oid_ops, attnum int2_ops));
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 304e8c18d52..b82c93aa681 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -152,6 +152,8 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
*/
typedef FormData_pg_class *Form_pg_class;
+DECLARE_TOAST(pg_class, 9003, 9004);
+
DECLARE_UNIQUE_INDEX_PKEY(pg_class_oid_index, 2662, ClassOidIndexId, on pg_class using btree(oid oid_ops));
DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, ClassNameNspIndexId, on pg_class using btree(relname name_ops, relnamespace oid_ops));
DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops));
diff --git a/src/include/catalog/pg_largeobject_metadata.h b/src/include/catalog/pg_largeobject_metadata.h
index ec1c3bf7552..1db39377929 100644
--- a/src/include/catalog/pg_largeobject_metadata.h
+++ b/src/include/catalog/pg_largeobject_metadata.h
@@ -46,6 +46,8 @@ CATALOG(pg_largeobject_metadata,2995,LargeObjectMetadataRelationId)
*/
typedef FormData_pg_largeobject_metadata *Form_pg_largeobject_metadata;
+DECLARE_TOAST(pg_largeobject_metadata, 9017, 9018);
+
DECLARE_UNIQUE_INDEX_PKEY(pg_largeobject_metadata_oid_index, 2996, LargeObjectMetadataOidIndexId, on pg_largeobject_metadata using btree(oid oid_ops));
#endif /* PG_LARGEOBJECT_METADATA_H */
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index 3db375d7cc7..90951431f4d 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -45,5 +45,6 @@ extern void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
TransactionId frozenXid,
MultiXactId minMulti,
char newrelpersistence);
+extern bool is_inside_rebuild_relation(void);
#endif /* CLUSTER_H */
diff --git a/src/test/regress/expected/misc_sanity.out b/src/test/regress/expected/misc_sanity.out
index a57fd142a94..8053669eefe 100644
--- a/src/test/regress/expected/misc_sanity.out
+++ b/src/test/regress/expected/misc_sanity.out
@@ -35,11 +35,8 @@ WHERE refclassid = 0 OR refobjid = 0 OR
-- Look for system tables with varlena columns but no toast table. All
-- system tables with toastable columns should have toast tables, with
-- the following exceptions:
--- 1. pg_class, pg_attribute, and pg_index, due to fear of recursive
--- dependencies as toast tables depend on them.
--- 2. pg_largeobject and pg_largeobject_metadata. Large object catalogs
--- and toast tables are mutually exclusive and large object data is handled
--- as user data by pg_upgrade, which would cause failures.
+-- 1. pg_index.
+-- 2. pg_largeobject.
SELECT relname, attname, atttypid::regtype
FROM pg_class c JOIN pg_attribute a ON c.oid = attrelid
WHERE c.oid < 16384 AND
@@ -47,20 +44,12 @@ WHERE c.oid < 16384 AND
relkind = 'r' AND
attstorage != 'p'
ORDER BY 1, 2;
- relname | attname | atttypid
--------------------------+---------------+--------------
- pg_attribute | attacl | aclitem[]
- pg_attribute | attfdwoptions | text[]
- pg_attribute | attmissingval | anyarray
- pg_attribute | attoptions | text[]
- pg_class | relacl | aclitem[]
- pg_class | reloptions | text[]
- pg_class | relpartbound | pg_node_tree
- pg_index | indexprs | pg_node_tree
- pg_index | indpred | pg_node_tree
- pg_largeobject | data | bytea
- pg_largeobject_metadata | lomacl | aclitem[]
-(11 rows)
+ relname | attname | atttypid
+----------------+----------+--------------
+ pg_index | indexprs | pg_node_tree
+ pg_index | indpred | pg_node_tree
+ pg_largeobject | data | bytea
+(3 rows)
-- system catalogs without primary keys
--
diff --git a/src/test/regress/expected/pg_catalog_toast1.out b/src/test/regress/expected/pg_catalog_toast1.out
new file mode 100644
index 00000000000..1918351fede
--- /dev/null
+++ b/src/test/regress/expected/pg_catalog_toast1.out
@@ -0,0 +1,25 @@
+CREATE OR REPLACE FUNCTION toast_acl()
+RETURNS BOOLEAN AS $$
+DECLARE
+ I INTEGER :=0;
+BEGIN
+ SET LOCAL CLIENT_MIN_MESSAGES=WARNING;
+ EXECUTE 'DROP TABLE IF EXISTS test2510';
+ EXECUTE 'CREATE TABLE test2510 (a INT)';
+ FOR I IN 1..4096 LOOP
+ EXECUTE 'DROP ROLE IF EXISTS role' || I;
+ EXECUTE 'CREATE ROLE role' || I;
+ EXECUTE 'GRANT ALL ON test2510 TO role' || I;
+ END LOOP;
+ RESET CLIENT_MIN_MESSAGES;
+ RETURN TRUE;
+END;
+$$
+LANGUAGE PLPGSQL;
+SELECT toast_acl();
+ toast_acl
+-----------
+ t
+(1 row)
+
+CREATE INDEX ON test2510 USING BTREE(a);
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 5b0c73d7e37..270da2ecfa9 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -134,3 +134,6 @@ test: fast_default
# run stats by itself because its delay may be insufficient under heavy load
test: stats
+
+#check for "invalig tupple length" error when creating index with toasted pg_class
+test: pg_catalog_toast1
diff --git a/src/test/regress/sql/misc_sanity.sql b/src/test/regress/sql/misc_sanity.sql
index 2c0f87a651f..f1feb7a77e6 100644
--- a/src/test/regress/sql/misc_sanity.sql
+++ b/src/test/regress/sql/misc_sanity.sql
@@ -38,11 +38,8 @@ WHERE refclassid = 0 OR refobjid = 0 OR
-- Look for system tables with varlena columns but no toast table. All
-- system tables with toastable columns should have toast tables, with
-- the following exceptions:
--- 1. pg_class, pg_attribute, and pg_index, due to fear of recursive
--- dependencies as toast tables depend on them.
--- 2. pg_largeobject and pg_largeobject_metadata. Large object catalogs
--- and toast tables are mutually exclusive and large object data is handled
--- as user data by pg_upgrade, which would cause failures.
+-- 1. pg_index.
+-- 2. pg_largeobject.
SELECT relname, attname, atttypid::regtype
FROM pg_class c JOIN pg_attribute a ON c.oid = attrelid
diff --git a/src/test/regress/sql/pg_catalog_toast1.sql b/src/test/regress/sql/pg_catalog_toast1.sql
new file mode 100644
index 00000000000..d33122214fe
--- /dev/null
+++ b/src/test/regress/sql/pg_catalog_toast1.sql
@@ -0,0 +1,20 @@
+CREATE OR REPLACE FUNCTION toast_acl()
+RETURNS BOOLEAN AS $$
+DECLARE
+ I INTEGER :=0;
+BEGIN
+ SET LOCAL CLIENT_MIN_MESSAGES=WARNING;
+ EXECUTE 'DROP TABLE IF EXISTS test2510';
+ EXECUTE 'CREATE TABLE test2510 (a INT)';
+ FOR I IN 1..4096 LOOP
+ EXECUTE 'DROP ROLE IF EXISTS role' || I;
+ EXECUTE 'CREATE ROLE role' || I;
+ EXECUTE 'GRANT ALL ON test2510 TO role' || I;
+ END LOOP;
+ RESET CLIENT_MIN_MESSAGES;
+ RETURN TRUE;
+END;
+$$
+LANGUAGE PLPGSQL;
+SELECT toast_acl();
+CREATE INDEX ON test2510 USING BTREE(a);
--
2.20.1