[PATCH] Sort policies and triggers by table name in pg_dump.

2019-09-23 Thread Benjie Gillam
Hello all,

Currently pg_dump sorts most dumpable objects by priority, namespace, name
and then object ID. Since triggers and RLS policies belong to tables, there
may be more than one with the same name within the same namespace, leading to
potential sorting discrepancies between databases that only differ by object
IDs.

The attached draft patch (made against `pg_dump_sort.c` on master) breaks
ties for trigger and policy objects by using the table name, increasing the
sort order stability. I have compiled it and executed it against a number of
local databases and it behaves as desired.

I am new to PostgreSQL contribution and my C-skills are rusty, so please let
me know if I can improve the patch, or if there are areas of PostgreSQL that
I have overlooked.

Kind regards,

Benjie Gillam
From 55b3845b746498a1544bc7ced16a369f4da3905f Mon Sep 17 00:00:00 2001
From: Benjie Gillam 
Date: Mon, 23 Sep 2019 21:18:24 +0100
Subject: [PATCH] Sort policies and triggers by table name in pg_dump.
To: pgsql-hack...@postgresql.org

---
 src/bin/pg_dump/pg_dump_sort.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 31fc06a255..5ff7359dff 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -207,6 +207,28 @@ DOTypeNameCompare(const void *p1, const void *p2)
 		if (cmpval != 0)
 			return cmpval;
 	}
+	else if (obj1->objType == DO_POLICY)
+	{
+		PolicyInfo *pobj1 = *(PolicyInfo *const *) p1;
+		PolicyInfo *pobj2 = *(PolicyInfo *const *) p2;
+
+		/* Sort by table name */
+		cmpval = strcmp(pobj1->poltable->dobj.name,
+		pobj2->poltable->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_TRIGGER)
+	{
+		TriggerInfo *tobj1 = *(PolicyInfo *const *) p1;
+		TriggerInfo *tobj2 = *(PolicyInfo *const *) p2;
+
+		/* Sort by table name */
+		cmpval = strcmp(tobj1->tgtable->dobj.name,
+		tobj2->tgtable->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
 
 	/* Usually shouldn't get here, but if we do, sort by OID */
 	return oidcmp(obj1->catId.oid, obj2->catId.oid);
-- 
2.17.1



Re: [PATCH] Sort policies and triggers by table name in pg_dump.

2019-09-24 Thread Benjie Gillam
> Could you provide a simple example of schema (tables with some
> policies and triggers), with the difference this generates for
> pg_dump, which shows your point?

Certainly; I've attached a bash script that can reproduce the issue
and the diff that it produces, here's the important part:

CREATE TRIGGER a BEFORE INSERT ON foo
FOR EACH ROW EXECUTE PROCEDURE qux();
CREATE POLICY a ON foo FOR SELECT USING (true);

CREATE TRIGGER a BEFORE INSERT ON bar
FOR EACH ROW EXECUTE PROCEDURE qux();
CREATE POLICY a ON bar FOR SELECT USING (true);

Here we create two identically named triggers and two identically
named policies on tables foo and bar. If instead we ran these
statements in a different order (or if the object IDs were to wrap)
the order of the pg_dump would be different even though the
databases are identical other than object IDs. The attached
patch eliminates this difference.

> Your patch has two warnings because you are trying to map a policy
> info pointer to a trigger info pointer:

Ah, thank you for the pointer (aha); I've attached an updated patch
that addresses this copy/paste issue.
From 42c6a84f0e8608b8161ff320628f541f330ac2d0 Mon Sep 17 00:00:00 2001
From: Benjie Gillam 
Date: Mon, 23 Sep 2019 21:18:24 +0100
Subject: [PATCH] Sort policies and triggers by table name in pg_dump.
To: pgsql-hack...@postgresql.org

---
 src/bin/pg_dump/pg_dump_sort.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 31fc06a255..08ab9c6b95 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -207,6 +207,28 @@ DOTypeNameCompare(const void *p1, const void *p2)
 		if (cmpval != 0)
 			return cmpval;
 	}
+	else if (obj1->objType == DO_POLICY)
+	{
+		PolicyInfo *pobj1 = *(PolicyInfo *const *) p1;
+		PolicyInfo *pobj2 = *(PolicyInfo *const *) p2;
+
+		/* Sort by table name */
+		cmpval = strcmp(pobj1->poltable->dobj.name,
+		pobj2->poltable->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_TRIGGER)
+	{
+		TriggerInfo *tobj1 = *(TriggerInfo *const *) p1;
+		TriggerInfo *tobj2 = *(TriggerInfo *const *) p2;
+
+		/* Sort by table name */
+		cmpval = strcmp(tobj1->tgtable->dobj.name,
+		tobj2->tgtable->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
 
 	/* Usually shouldn't get here, but if we do, sort by OID */
 	return oidcmp(obj1->catId.oid, obj2->catId.oid);
-- 
2.17.1



reproduce.sh
Description: application/shellscript
CREATE FUNCTION
CREATE TABLE
CREATE TABLE
CREATE TRIGGER
CREATE POLICY
CREATE TRIGGER
CREATE POLICY
CREATE FUNCTION
CREATE TABLE
CREATE TABLE
CREATE TRIGGER
CREATE POLICY
CREATE TRIGGER
CREATE POLICY
--- /dev/fd/60	2019-09-24 08:33:03.574700824 +0100
+++ /dev/fd/45	2019-09-24 08:33:03.574700824 +0100
@@ -70,31 +70,31 @@
 
 
 --
--- Name: foo a; Type: TRIGGER; Schema: public; Owner: benjie
+-- Name: bar a; Type: TRIGGER; Schema: public; Owner: benjie
 --
 
-CREATE TRIGGER a BEFORE INSERT ON public.foo FOR EACH ROW EXECUTE PROCEDURE public.qux();
+CREATE TRIGGER a BEFORE INSERT ON public.bar FOR EACH ROW EXECUTE PROCEDURE public.qux();
 
 
 --
--- Name: bar a; Type: TRIGGER; Schema: public; Owner: benjie
+-- Name: foo a; Type: TRIGGER; Schema: public; Owner: benjie
 --
 
-CREATE TRIGGER a BEFORE INSERT ON public.bar FOR EACH ROW EXECUTE PROCEDURE public.qux();
+CREATE TRIGGER a BEFORE INSERT ON public.foo FOR EACH ROW EXECUTE PROCEDURE public.qux();
 
 
 --
--- Name: foo a; Type: POLICY; Schema: public; Owner: benjie
+-- Name: bar a; Type: POLICY; Schema: public; Owner: benjie
 --
 
-CREATE POLICY a ON public.foo FOR SELECT USING (true);
+CREATE POLICY a ON public.bar FOR SELECT USING (true);
 
 
 --
--- Name: bar a; Type: POLICY; Schema: public; Owner: benjie
+-- Name: foo a; Type: POLICY; Schema: public; Owner: benjie
 --
 
-CREATE POLICY a ON public.bar FOR SELECT USING (true);
+CREATE POLICY a ON public.foo FOR SELECT USING (true);
 
 
 --


Re: [PATCH] Sort policies and triggers by table name in pg_dump.

2019-09-25 Thread Benjie Gillam
> Thanks.  Perhaps you could add your patch to the next commit fest
> then at https://commitfest.postgresql.org/25/?

Thanks, submitted.




Re: fix for BUG #3720: wrong results at using ltree

2019-11-19 Thread Benjie Gillam
The following review has been posted through the commitfest application:
make installcheck-world:  tested, passed
Implements feature:   tested, passed
Spec compliant:   not tested
Documentation:tested, failed

This is my first PostgreSQL commitfest and review, guidance welcome.

This patch is straightforward, it applies cleanly, and it includes tests (I've 
also tested the feature manually).

The (existing) documentation states "The length of a label path must be less 
than 65kB," I believe that the 65kB mentioned here should instead be 64kB - 
perhaps the patch could be updated with this single-character fix? At first I 
thought the 65kB limit would be applied to the label path string (e.g. 
'Top.Countries.Europe.Russia' would be 27 bytes), but it seems the limit 
applies to the number of labels in the path - perhaps `kB` is not the right 
measurement here and it should explicitly state 65536?

It is not stated in the documentation what should happen if the label path 
length is greater than 65535, so raising an error makes sense (but may be a 
breaking change).

The new status of this patch is: Waiting on Author