Experimenting with hash tables inside pg_dump

Tom Lane Thu, 21 Oct 2021 15:27:41 -0700

Today, pg_dump does a lot of internal lookups via binary search
in presorted arrays.  I thought it might improve matters
to replace those binary searches with hash tables, theoretically
converting O(log N) searches into O(1) searches.  So I tried making
a hash table indexed by CatalogId (tableoid+oid) with simplehash.h,
and replacing as many data structures as I could with that.


This makes the code shorter and (IMO anyway) cleaner, but

(a) the executable size increases by a few KB --- apparently, even
the minimum subset of simplehash.h's functionality is code-wasteful.

(b) I couldn't measure any change in performance at all.  I tried
it on the regression database and on a toy DB with 10000 simple
tables.  Maybe on a really large DB you'd notice some difference,
but I'm not very optimistic now.

So this experiment feels like a failure, but I thought I'd post
the patch and results for the archives' sake.  Maybe somebody
will think of a way to improve matters.  Or maybe it's worth
doing just to shorten the code?

                        regards, tom lane

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index 1f24e79665..49932fd598 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -18,6 +18,14 @@
 #include <ctype.h>
 
 #include "catalog/pg_class_d.h"
+#include "catalog/pg_collation_d.h"
+#include "catalog/pg_extension_d.h"
+#include "catalog/pg_namespace_d.h"
+#include "catalog/pg_operator_d.h"
+#include "catalog/pg_proc_d.h"
+#include "catalog/pg_publication_d.h"
+#include "catalog/pg_type_d.h"
+#include "common/hashfn.h"
 #include "fe_utils/string_utils.h"
 #include "pg_backup_archiver.h"
 #include "pg_backup_utils.h"
@@ -31,54 +39,54 @@ static int	allocedDumpIds = 0;
 static DumpId lastDumpId = 0;	/* Note: 0 is InvalidDumpId */
 
 /*
- * Variables for mapping CatalogId to DumpableObject
- */
-static bool catalogIdMapValid = false;
-static DumpableObject **catalogIdMap = NULL;
-static int	numCatalogIds = 0;
-
-/*
- * These variables are static to avoid the notational cruft of having to pass
- * them into findTableByOid() and friends.  For each of these arrays, we build
- * a sorted-by-OID index array immediately after the objects are fetched,
- * and then we use binary search in findTableByOid() and friends.  (qsort'ing
- * the object arrays themselves would be simpler, but it doesn't work because
- * pg_dump.c may have already established pointers between items.)
+ * Infrastructure for mapping CatalogId to DumpableObject
+ *
+ * We use a hash table generated by simplehash.h.  That infrastructure
+ * requires all the hash table entries to be the same size, and it also
+ * expects that it can move them around when resizing the table.  So we
+ * cannot make the DumpableObjects be elements of the hash table directly;
+ * instead, the hash table elements contain pointers to DumpableObjects.
+ *
+ * It turns out to be convenient to also use this data structure to map
+ * CatalogIds to owning extensions, if any.  Since extension membership
+ * data is read before creating most DumpableObjects, either one of dobj
+ * and ext could be NULL.
  */
-static DumpableObject **tblinfoindex;
-static DumpableObject **typinfoindex;
-static DumpableObject **funinfoindex;
-static DumpableObject **oprinfoindex;
-static DumpableObject **collinfoindex;
-static DumpableObject **nspinfoindex;
-static DumpableObject **extinfoindex;
-static DumpableObject **pubinfoindex;
-static int	numTables;
-static int	numTypes;
-static int	numFuncs;
-static int	numOperators;
-static int	numCollations;
-static int	numNamespaces;
-static int	numExtensions;
-static int	numPublications;
-
-/* This is an array of object identities, not actual DumpableObjects */
-static ExtensionMemberId *extmembers;
-static int	numextmembers;
+typedef struct _catalogIdMapEntry
+{
+	CatalogId	catId;			/* the indexed CatalogId */
+	uint32		status;			/* hash status */
+	uint32		hashval;		/* hash code for the CatalogId */
+	DumpableObject *dobj;		/* the associated DumpableObject, if any */
+	ExtensionInfo *ext;			/* owning extension, if any */
+} CatalogIdMapEntry;
+
+#define SH_PREFIX		catalogid
+#define SH_ELEMENT_TYPE	CatalogIdMapEntry
+#define SH_KEY_TYPE		CatalogId
+#define	SH_KEY			catId
+#define SH_HASH_KEY(tb, key)	hash_bytes((const unsigned char *) &(key), sizeof(CatalogId))
+#define SH_EQUAL(tb, a, b)		((a).oid == (b).oid && (a).tableoid == (b).tableoid)
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) (a)->hashval
+#define	SH_SCOPE		static inline
+#define SH_RAW_ALLOCATOR	pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+#define CATALOGIDHASH_INITIAL_SIZE	10000
+
+static catalogid_hash *catalogIdHash = NULL;
 
 static void flagInhTables(Archive *fout, TableInfo *tbinfo, int numTables,
 						  InhInfo *inhinfo, int numInherits);
 static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
 static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
-static DumpableObject **buildIndexArray(void *objArray, int numObjs,
-										Size objSize);
-static int	DOCatalogIdCompare(const void *p1, const void *p2);
-static int	ExtensionMemberIdCompare(const void *p1, const void *p2);
 static void findParentsByOid(TableInfo *self,
 							 InhInfo *inhinfo, int numInherits);
 static int	strInArray(const char *pattern, char **arr, int arr_size);
-static IndxInfo *findIndexByOid(Oid oid, DumpableObject **idxinfoindex,
-								int numIndexes);
+static IndxInfo *findIndexByOid(Oid oid);
 
 
 /*
@@ -89,14 +97,16 @@ TableInfo *
 getSchemaData(Archive *fout, int *numTablesPtr)
 {
 	TableInfo  *tblinfo;
-	TypeInfo   *typinfo;
-	FuncInfo   *funinfo;
-	OprInfo    *oprinfo;
-	CollInfo   *collinfo;
-	NamespaceInfo *nspinfo;
 	ExtensionInfo *extinfo;
-	PublicationInfo *pubinfo;
 	InhInfo    *inhinfo;
+	int			numTables;
+	int			numTypes;
+	int			numFuncs;
+	int			numOperators;
+	int			numCollations;
+	int			numNamespaces;
+	int			numExtensions;
+	int			numPublications;
 	int			numAggregates;
 	int			numInherits;
 	int			numRules;
@@ -123,14 +133,12 @@ getSchemaData(Archive *fout, int *numTablesPtr)
 	 */
 	pg_log_info("reading extensions");
 	extinfo = getExtensions(fout, &numExtensions);
-	extinfoindex = buildIndexArray(extinfo, numExtensions, sizeof(ExtensionInfo));
 
 	pg_log_info("identifying extension members");
 	getExtensionMembership(fout, extinfo, numExtensions);
 
 	pg_log_info("reading schemas");
-	nspinfo = getNamespaces(fout, &numNamespaces);
-	nspinfoindex = buildIndexArray(nspinfo, numNamespaces, sizeof(NamespaceInfo));
+	(void) getNamespaces(fout, &numNamespaces);
 
 	/*
 	 * getTables should be done as soon as possible, so as to minimize the
@@ -140,19 +148,15 @@ getSchemaData(Archive *fout, int *numTablesPtr)
 	 */
 	pg_log_info("reading user-defined tables");
 	tblinfo = getTables(fout, &numTables);
-	tblinfoindex = buildIndexArray(tblinfo, numTables, sizeof(TableInfo));
 
-	/* Do this after we've built tblinfoindex */
 	getOwnedSeqs(fout, tblinfo, numTables);
 
 	pg_log_info("reading user-defined functions");
-	funinfo = getFuncs(fout, &numFuncs);
-	funinfoindex = buildIndexArray(funinfo, numFuncs, sizeof(FuncInfo));
+	(void) getFuncs(fout, &numFuncs);
 
 	/* this must be after getTables and getFuncs */
 	pg_log_info("reading user-defined types");
-	typinfo = getTypes(fout, &numTypes);
-	typinfoindex = buildIndexArray(typinfo, numTypes, sizeof(TypeInfo));
+	(void) getTypes(fout, &numTypes);
 
 	/* this must be after getFuncs, too */
 	pg_log_info("reading procedural languages");
@@ -162,8 +166,7 @@ getSchemaData(Archive *fout, int *numTablesPtr)
 	getAggregates(fout, &numAggregates);
 
 	pg_log_info("reading user-defined operators");
-	oprinfo = getOperators(fout, &numOperators);
-	oprinfoindex = buildIndexArray(oprinfo, numOperators, sizeof(OprInfo));
+	(void) getOperators(fout, &numOperators);
 
 	pg_log_info("reading user-defined access methods");
 	getAccessMethods(fout, &numAccessMethods);
@@ -196,8 +199,7 @@ getSchemaData(Archive *fout, int *numTablesPtr)
 	getDefaultACLs(fout, &numDefaultACLs);
 
 	pg_log_info("reading user-defined collations");
-	collinfo = getCollations(fout, &numCollations);
-	collinfoindex = buildIndexArray(collinfo, numCollations, sizeof(CollInfo));
+	(void) getCollations(fout, &numCollations);
 
 	pg_log_info("reading user-defined conversions");
 	getConversions(fout, &numConversions);
@@ -250,9 +252,7 @@ getSchemaData(Archive *fout, int *numTablesPtr)
 	getPolicies(fout, tblinfo, numTables);
 
 	pg_log_info("reading publications");
-	pubinfo = getPublications(fout, &numPublications);
-	pubinfoindex = buildIndexArray(pubinfo, numPublications,
-								   sizeof(PublicationInfo));
+	(void) getPublications(fout, &numPublications);
 
 	pg_log_info("reading publication membership");
 	getPublicationTables(fout, tblinfo, numTables);
@@ -375,34 +375,15 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 	int			i,
 				j,
 				k;
-	DumpableObject ***parentIndexArray;
-
-	parentIndexArray = (DumpableObject ***)
-		pg_malloc0(getMaxDumpId() * sizeof(DumpableObject **));
 
 	for (i = 0; i < numTables; i++)
 	{
-		TableInfo  *parenttbl;
 		IndexAttachInfo *attachinfo;
 
 		if (!tblinfo[i].ispartition || tblinfo[i].numParents == 0)
 			continue;
 
 		Assert(tblinfo[i].numParents == 1);
-		parenttbl = tblinfo[i].parents[0];
-
-		/*
-		 * We need access to each parent table's index list, but there is no
-		 * index to cover them outside of this function.  To avoid having to
-		 * sort every parent table's indexes each time we come across each of
-		 * its partitions, create an indexed array for each parent the first
-		 * time it is required.
-		 */
-		if (parentIndexArray[parenttbl->dobj.dumpId] == NULL)
-			parentIndexArray[parenttbl->dobj.dumpId] =
-				buildIndexArray(parenttbl->indexes,
-								parenttbl->numIndexes,
-								sizeof(IndxInfo));
 
 		attachinfo = (IndexAttachInfo *)
 			pg_malloc0(tblinfo[i].numIndexes * sizeof(IndexAttachInfo));
@@ -414,9 +395,7 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			if (index->parentidx == 0)
 				continue;
 
-			parentidx = findIndexByOid(index->parentidx,
-									   parentIndexArray[parenttbl->dobj.dumpId],
-									   parenttbl->numIndexes);
+			parentidx = findIndexByOid(index->parentidx);
 			if (parentidx == NULL)
 				continue;
 
@@ -457,11 +436,6 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			k++;
 		}
 	}
-
-	for (i = 0; i < numTables; i++)
-		if (parentIndexArray[i])
-			pg_free(parentIndexArray[i]);
-	pg_free(parentIndexArray);
 }
 
 /* flagInhAttrs -
@@ -596,7 +570,7 @@ flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables)
 /*
  * AssignDumpId
  *		Given a newly-created dumpable object, assign a dump ID,
- *		and enter the object into the lookup table.
+ *		and enter the object into the lookup tables.
  *
  * The caller is expected to have filled in objType and catId,
  * but not any of the other standard fields of a DumpableObject.
@@ -615,6 +589,7 @@ AssignDumpId(DumpableObject *dobj)
 	dobj->nDeps = 0;
 	dobj->allocDeps = 0;
 
+	/* Add object to dumpIdMap[], enlarging that array if need be */
 	while (dobj->dumpId >= allocedDumpIds)
 	{
 		int			newAlloc;
@@ -637,8 +612,25 @@ AssignDumpId(DumpableObject *dobj)
 	}
 	dumpIdMap[dobj->dumpId] = dobj;
 
-	/* mark catalogIdMap invalid, but don't rebuild it yet */
-	catalogIdMapValid = false;
+	/* If it has a valid CatalogId, enter it into the hash table */
+	if (OidIsValid(dobj->catId.tableoid))
+	{
+		CatalogIdMapEntry *entry;
+		bool		found;
+
+		/* Initialize CatalogId hash table if not done yet */
+		if (catalogIdHash == NULL)
+			catalogIdHash = catalogid_create(CATALOGIDHASH_INITIAL_SIZE, NULL);
+
+		entry = catalogid_insert(catalogIdHash, dobj->catId, &found);
+		if (!found)
+		{
+			entry->dobj = NULL;
+			entry->ext = NULL;
+		}
+		Assert(entry->dobj == NULL);
+		entry->dobj = dobj;
+	}
 }
 
 /*
@@ -679,140 +671,19 @@ findObjectByDumpId(DumpId dumpId)
  * Find a DumpableObject by catalog ID
  *
  * Returns NULL for unknown ID
- *
- * We use binary search in a sorted list that is built on first call.
- * If AssignDumpId() and findObjectByCatalogId() calls were freely intermixed,
- * the code would work, but possibly be very slow.  In the current usage
- * pattern that does not happen, indeed we build the list at most twice.
  */
 DumpableObject *
 findObjectByCatalogId(CatalogId catalogId)
 {
-	DumpableObject **low;
-	DumpableObject **high;
-
-	if (!catalogIdMapValid)
-	{
-		if (catalogIdMap)
-			free(catalogIdMap);
-		getDumpableObjects(&catalogIdMap, &numCatalogIds);
-		if (numCatalogIds > 1)
-			qsort((void *) catalogIdMap, numCatalogIds,
-				  sizeof(DumpableObject *), DOCatalogIdCompare);
-		catalogIdMapValid = true;
-	}
-
-	/*
-	 * We could use bsearch() here, but the notational cruft of calling
-	 * bsearch is nearly as bad as doing it ourselves; and the generalized
-	 * bsearch function is noticeably slower as well.
-	 */
-	if (numCatalogIds <= 0)
-		return NULL;
-	low = catalogIdMap;
-	high = catalogIdMap + (numCatalogIds - 1);
-	while (low <= high)
-	{
-		DumpableObject **middle;
-		int			difference;
-
-		middle = low + (high - low) / 2;
-		/* comparison must match DOCatalogIdCompare, below */
-		difference = oidcmp((*middle)->catId.oid, catalogId.oid);
-		if (difference == 0)
-			difference = oidcmp((*middle)->catId.tableoid, catalogId.tableoid);
-		if (difference == 0)
-			return *middle;
-		else if (difference < 0)
-			low = middle + 1;
-		else
-			high = middle - 1;
-	}
-	return NULL;
-}
-
-/*
- * Find a DumpableObject by OID, in a pre-sorted array of one type of object
- *
- * Returns NULL for unknown OID
- */
-static DumpableObject *
-findObjectByOid(Oid oid, DumpableObject **indexArray, int numObjs)
-{
-	DumpableObject **low;
-	DumpableObject **high;
+	CatalogIdMapEntry *entry;
 
-	/*
-	 * This is the same as findObjectByCatalogId except we assume we need not
-	 * look at table OID because the objects are all the same type.
-	 *
-	 * We could use bsearch() here, but the notational cruft of calling
-	 * bsearch is nearly as bad as doing it ourselves; and the generalized
-	 * bsearch function is noticeably slower as well.
-	 */
-	if (numObjs <= 0)
-		return NULL;
-	low = indexArray;
-	high = indexArray + (numObjs - 1);
-	while (low <= high)
-	{
-		DumpableObject **middle;
-		int			difference;
-
-		middle = low + (high - low) / 2;
-		difference = oidcmp((*middle)->catId.oid, oid);
-		if (difference == 0)
-			return *middle;
-		else if (difference < 0)
-			low = middle + 1;
-		else
-			high = middle - 1;
-	}
-	return NULL;
-}
-
-/*
- * Build an index array of DumpableObject pointers, sorted by OID
- */
-static DumpableObject **
-buildIndexArray(void *objArray, int numObjs, Size objSize)
-{
-	DumpableObject **ptrs;
-	int			i;
+	if (catalogIdHash == NULL)
+		return NULL;			/* no objects exist yet */
 
-	if (numObjs <= 0)
+	entry = catalogid_lookup(catalogIdHash, catalogId);
+	if (entry == NULL)
 		return NULL;
-
-	ptrs = (DumpableObject **) pg_malloc(numObjs * sizeof(DumpableObject *));
-	for (i = 0; i < numObjs; i++)
-		ptrs[i] = (DumpableObject *) ((char *) objArray + i * objSize);
-
-	/* We can use DOCatalogIdCompare to sort since its first key is OID */
-	if (numObjs > 1)
-		qsort((void *) ptrs, numObjs, sizeof(DumpableObject *),
-			  DOCatalogIdCompare);
-
-	return ptrs;
-}
-
-/*
- * qsort comparator for pointers to DumpableObjects
- */
-static int
-DOCatalogIdCompare(const void *p1, const void *p2)
-{
-	const DumpableObject *obj1 = *(DumpableObject *const *) p1;
-	const DumpableObject *obj2 = *(DumpableObject *const *) p2;
-	int			cmpval;
-
-	/*
-	 * Compare OID first since it's usually unique, whereas there will only be
-	 * a few distinct values of tableoid.
-	 */
-	cmpval = oidcmp(obj1->catId.oid, obj2->catId.oid);
-	if (cmpval == 0)
-		cmpval = oidcmp(obj1->catId.tableoid, obj2->catId.tableoid);
-	return cmpval;
+	return entry->dobj;
 }
 
 /*
@@ -886,119 +757,169 @@ removeObjectDependency(DumpableObject *dobj, DumpId refId)
 
 /*
  * findTableByOid
- *	  finds the entry (in tblinfo) of the table with the given oid
+ *	  finds the DumpableObject for the table with the given oid
  *	  returns NULL if not found
  */
 TableInfo *
 findTableByOid(Oid oid)
 {
-	return (TableInfo *) findObjectByOid(oid, tblinfoindex, numTables);
+	CatalogId	catId;
+	DumpableObject *dobj;
+
+	catId.tableoid = RelationRelationId;
+	catId.oid = oid;
+	dobj = findObjectByCatalogId(catId);
+	Assert(dobj == NULL || dobj->objType == DO_TABLE);
+	return (TableInfo *) dobj;
+}
+
+/*
+ * findIndexByOid
+ *	  finds the DumpableObject for the index with the given oid
+ *	  returns NULL if not found
+ */
+static IndxInfo *
+findIndexByOid(Oid oid)
+{
+	CatalogId	catId;
+	DumpableObject *dobj;
+
+	catId.tableoid = RelationRelationId;
+	catId.oid = oid;
+	dobj = findObjectByCatalogId(catId);
+	Assert(dobj == NULL || dobj->objType == DO_INDEX);
+	return (IndxInfo *) dobj;
 }
 
 /*
  * findTypeByOid
- *	  finds the entry (in typinfo) of the type with the given oid
+ *	  finds the DumpableObject for the type with the given oid
  *	  returns NULL if not found
  */
 TypeInfo *
 findTypeByOid(Oid oid)
 {
-	return (TypeInfo *) findObjectByOid(oid, typinfoindex, numTypes);
+	CatalogId	catId;
+
+	catId.tableoid = TypeRelationId;
+	catId.oid = oid;
+	return (TypeInfo *) findObjectByCatalogId(catId);
 }
 
 /*
  * findFuncByOid
- *	  finds the entry (in funinfo) of the function with the given oid
+ *	  finds the DumpableObject for the function with the given oid
  *	  returns NULL if not found
  */
 FuncInfo *
 findFuncByOid(Oid oid)
 {
-	return (FuncInfo *) findObjectByOid(oid, funinfoindex, numFuncs);
+	CatalogId	catId;
+
+	catId.tableoid = ProcedureRelationId;
+	catId.oid = oid;
+	return (FuncInfo *) findObjectByCatalogId(catId);
 }
 
 /*
  * findOprByOid
- *	  finds the entry (in oprinfo) of the operator with the given oid
+ *	  finds the DumpableObject for the operator with the given oid
  *	  returns NULL if not found
  */
 OprInfo *
 findOprByOid(Oid oid)
 {
-	return (OprInfo *) findObjectByOid(oid, oprinfoindex, numOperators);
+	CatalogId	catId;
+
+	catId.tableoid = OperatorRelationId;
+	catId.oid = oid;
+	return (OprInfo *) findObjectByCatalogId(catId);
 }
 
 /*
  * findCollationByOid
- *	  finds the entry (in collinfo) of the collation with the given oid
+ *	  finds the DumpableObject for the collation with the given oid
  *	  returns NULL if not found
  */
 CollInfo *
 findCollationByOid(Oid oid)
 {
-	return (CollInfo *) findObjectByOid(oid, collinfoindex, numCollations);
+	CatalogId	catId;
+
+	catId.tableoid = CollationRelationId;
+	catId.oid = oid;
+	return (CollInfo *) findObjectByCatalogId(catId);
 }
 
 /*
  * findNamespaceByOid
- *	  finds the entry (in nspinfo) of the namespace with the given oid
+ *	  finds the DumpableObject for the namespace with the given oid
  *	  returns NULL if not found
  */
 NamespaceInfo *
 findNamespaceByOid(Oid oid)
 {
-	return (NamespaceInfo *) findObjectByOid(oid, nspinfoindex, numNamespaces);
+	CatalogId	catId;
+
+	catId.tableoid = NamespaceRelationId;
+	catId.oid = oid;
+	return (NamespaceInfo *) findObjectByCatalogId(catId);
 }
 
 /*
  * findExtensionByOid
- *	  finds the entry (in extinfo) of the extension with the given oid
+ *	  finds the DumpableObject for the extension with the given oid
  *	  returns NULL if not found
  */
 ExtensionInfo *
 findExtensionByOid(Oid oid)
 {
-	return (ExtensionInfo *) findObjectByOid(oid, extinfoindex, numExtensions);
+	CatalogId	catId;
+
+	catId.tableoid = ExtensionRelationId;
+	catId.oid = oid;
+	return (ExtensionInfo *) findObjectByCatalogId(catId);
 }
 
 /*
  * findPublicationByOid
- *	  finds the entry (in pubinfo) of the publication with the given oid
+ *	  finds the DumpableObject for the publication with the given oid
  *	  returns NULL if not found
  */
 PublicationInfo *
 findPublicationByOid(Oid oid)
 {
-	return (PublicationInfo *) findObjectByOid(oid, pubinfoindex, numPublications);
-}
+	CatalogId	catId;
 
-/*
- * findIndexByOid
- *		find the entry of the index with the given oid
- *
- * This one's signature is different from the previous ones because we lack a
- * global array of all indexes, so caller must pass their array as argument.
- */
-static IndxInfo *
-findIndexByOid(Oid oid, DumpableObject **idxinfoindex, int numIndexes)
-{
-	return (IndxInfo *) findObjectByOid(oid, idxinfoindex, numIndexes);
+	catId.tableoid = PublicationRelationId;
+	catId.oid = oid;
+	return (PublicationInfo *) findObjectByCatalogId(catId);
 }
 
+
 /*
- * setExtensionMembership
- *	  accept and save data about which objects belong to extensions
+ * recordExtensionMembership
+ *	  Record that the object identified by the given catalog ID
+ *	  belongs to the given extension
  */
 void
-setExtensionMembership(ExtensionMemberId *extmems, int nextmems)
+recordExtensionMembership(CatalogId catId, ExtensionInfo *ext)
 {
-	/* Sort array in preparation for binary searches */
-	if (nextmems > 1)
-		qsort((void *) extmems, nextmems, sizeof(ExtensionMemberId),
-			  ExtensionMemberIdCompare);
-	/* And save */
-	extmembers = extmems;
-	numextmembers = nextmems;
+	CatalogIdMapEntry *entry;
+	bool		found;
+
+	/* CatalogId hash table must exist, if we have an ExtensionInfo */
+	Assert(catalogIdHash != NULL);
+
+	/* Add reference to CatalogId hash */
+	entry = catalogid_insert(catalogIdHash, catId, &found);
+	if (!found)
+	{
+		entry->dobj = NULL;
+		entry->ext = NULL;
+	}
+	Assert(entry->ext == NULL);
+	entry->ext = ext;
 }
 
 /*
@@ -1008,56 +929,15 @@ setExtensionMembership(ExtensionMemberId *extmems, int nextmems)
 ExtensionInfo *
 findOwningExtension(CatalogId catalogId)
 {
-	ExtensionMemberId *low;
-	ExtensionMemberId *high;
+	CatalogIdMapEntry *entry;
 
-	/*
-	 * We could use bsearch() here, but the notational cruft of calling
-	 * bsearch is nearly as bad as doing it ourselves; and the generalized
-	 * bsearch function is noticeably slower as well.
-	 */
-	if (numextmembers <= 0)
-		return NULL;
-	low = extmembers;
-	high = extmembers + (numextmembers - 1);
-	while (low <= high)
-	{
-		ExtensionMemberId *middle;
-		int			difference;
-
-		middle = low + (high - low) / 2;
-		/* comparison must match ExtensionMemberIdCompare, below */
-		difference = oidcmp(middle->catId.oid, catalogId.oid);
-		if (difference == 0)
-			difference = oidcmp(middle->catId.tableoid, catalogId.tableoid);
-		if (difference == 0)
-			return middle->ext;
-		else if (difference < 0)
-			low = middle + 1;
-		else
-			high = middle - 1;
-	}
-	return NULL;
-}
-
-/*
- * qsort comparator for ExtensionMemberIds
- */
-static int
-ExtensionMemberIdCompare(const void *p1, const void *p2)
-{
-	const ExtensionMemberId *obj1 = (const ExtensionMemberId *) p1;
-	const ExtensionMemberId *obj2 = (const ExtensionMemberId *) p2;
-	int			cmpval;
+	if (catalogIdHash == NULL)
+		return NULL;			/* no objects exist yet */
 
-	/*
-	 * Compare OID first since it's usually unique, whereas there will only be
-	 * a few distinct values of tableoid.
-	 */
-	cmpval = oidcmp(obj1->catId.oid, obj2->catId.oid);
-	if (cmpval == 0)
-		cmpval = oidcmp(obj1->catId.tableoid, obj2->catId.tableoid);
-	return cmpval;
+	entry = catalogid_lookup(catalogIdHash, catalogId);
+	if (entry == NULL)
+		return NULL;
+	return entry->ext;
 }
 
 
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 3c1cd858a8..6af10a85a2 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -236,6 +236,7 @@ typedef struct Archive
 
 typedef struct
 {
+	/* Note: this struct must not contain any unused bytes */
 	Oid			tableoid;
 	Oid			oid;
 } CatalogId;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index ed8ed2f266..948615a5b9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -17901,12 +17901,10 @@ getExtensionMembership(Archive *fout, ExtensionInfo extinfo[],
 	PQExpBuffer query;
 	PGresult   *res;
 	int			ntups,
-				nextmembers,
 				i;
 	int			i_classid,
 				i_objid,
 				i_refobjid;
-	ExtensionMemberId *extmembers;
 	ExtensionInfo *ext;
 
 	/* Nothing to do if no extensions */
@@ -17931,12 +17929,7 @@ getExtensionMembership(Archive *fout, ExtensionInfo extinfo[],
 	i_objid = PQfnumber(res, "objid");
 	i_refobjid = PQfnumber(res, "refobjid");
 
-	extmembers = (ExtensionMemberId *) pg_malloc(ntups * sizeof(ExtensionMemberId));
-	nextmembers = 0;
-
 	/*
-	 * Accumulate data into extmembers[].
-	 *
 	 * Since we ordered the SELECT by referenced ID, we can expect that
 	 * multiple entries for the same extension will appear together; this
 	 * saves on searches.
@@ -17963,16 +17956,11 @@ getExtensionMembership(Archive *fout, ExtensionInfo extinfo[],
 			continue;
 		}
 
-		extmembers[nextmembers].catId = objId;
-		extmembers[nextmembers].ext = ext;
-		nextmembers++;
+		recordExtensionMembership(objId, ext);
 	}
 
 	PQclear(res);
 
-	/* Remember the data for use later */
-	setExtensionMembership(extmembers, nextmembers);
-
 	destroyPQExpBuffer(query);
 }
 
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 29af845ece..cc55e598ec 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -647,16 +647,6 @@ typedef struct _SubscriptionInfo
 	char	   *subpublications;
 } SubscriptionInfo;
 
-/*
- * We build an array of these with an entry for each object that is an
- * extension member according to pg_depend.
- */
-typedef struct _extensionMemberId
-{
-	CatalogId	catId;			/* tableoid+oid of some member object */
-	ExtensionInfo *ext;			/* owning extension */
-} ExtensionMemberId;
-
 /*
  *	common utility functions
  */
@@ -682,7 +672,7 @@ extern NamespaceInfo *findNamespaceByOid(Oid oid);
 extern ExtensionInfo *findExtensionByOid(Oid oid);
 extern PublicationInfo *findPublicationByOid(Oid oid);
 
-extern void setExtensionMembership(ExtensionMemberId *extmems, int nextmems);
+extern void recordExtensionMembership(CatalogId catId, ExtensionInfo *ext);
 extern ExtensionInfo *findOwningExtension(CatalogId catalogId);
 
 extern void parseOidArray(const char *str, Oid *array, int arraysize);

Experimenting with hash tables inside pg_dump

Reply via email to