jian he <jian.universal...@gmail.com> writes:

Hi jian,

> hi.
> After `git am`, I still cannot build.
>
> ../../Desktop/pg_sources/main/postgres/src/backend/optimizer/path/uniquekey.c:125:45:
> error: variable ‘var’ set but not used
> [-Werror=unused-but-set-variable]
>   125 |                         Var                *var;
>       |                                             ^~~

Thanks for this report, looks clang 11 can't capture this error.  I have
switched to clang 17 which would report this issue at the first place. 

>
> You also need to change src/backend/optimizer/path/meson.build.

Great thanks.

>
> git apply failed.
>
> git am warning:
> Applying: uniquekey on base relation and used it for mark-distinct-as-op.
> .git/rebase-apply/patch:876: new blank line at EOF.
> +
> warning: 1 line adds whitespace errors.
>
> I think you can use `git diff --check`
> (https://git-scm.com/docs/git-diff) to check for whitespace related
> errors.

thanks for the really good suggestion.  Here is the newer version:

>From c2c752a3f66353d7d391976b0152c353a239ce60 Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihui.fan1...@gmail.com>
Date: Tue, 17 Oct 2023 11:06:53 +0800
Subject: [PATCH v2 1/1] uniquekey on base relation and used it for
 mark-distinct-as-noop.

---
 src/backend/nodes/list.c                |  17 ++
 src/backend/optimizer/path/Makefile     |   3 +-
 src/backend/optimizer/path/allpaths.c   |   2 +
 src/backend/optimizer/path/equivclass.c |  48 +++
 src/backend/optimizer/path/uniquekey.c  | 384 ++++++++++++++++++++++++
 src/backend/optimizer/plan/initsplan.c  |   8 +
 src/backend/optimizer/plan/planner.c    |  33 ++
 src/backend/optimizer/util/plancat.c    |  10 +
 src/backend/optimizer/util/relnode.c    |   2 +
 src/include/nodes/pathnodes.h           |  19 ++
 src/include/nodes/pg_list.h             |   2 +
 src/include/optimizer/paths.h           |  10 +
 src/test/regress/expected/uniquekey.out |  69 +++++
 src/test/regress/parallel_schedule      |   2 +-
 src/test/regress/sql/uniquekey.sql      |  28 ++
 15 files changed, 635 insertions(+), 2 deletions(-)
 create mode 100644 src/backend/optimizer/path/uniquekey.c
 create mode 100644 src/test/regress/expected/uniquekey.out
 create mode 100644 src/test/regress/sql/uniquekey.sql

diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index 750ee5a7e55..eaeaa19ad2d 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -694,6 +694,23 @@ list_member_ptr(const List *list, const void *datum)
 	return false;
 }
 
+/*
+ * Return index of the datum in list if found. otherwise return -1.
+ */
+int
+list_member_ptr_pos(const List *list, const void *datum)
+{
+	ListCell   *lc;
+
+	foreach(lc, list)
+	{
+		if (lfirst(lc) == datum)
+			return foreach_current_index(lc);
+	}
+
+	return -1;
+}
+
 /*
  * Return true iff the integer 'datum' is a member of the list.
  */
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f7..63cc1505d96 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	joinpath.o \
 	joinrels.o \
 	pathkeys.o \
-	tidpath.o
+	tidpath.o \
+	uniquekey.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 3cda88e3333..815a74a9dfa 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -458,6 +458,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 		}
 	}
 
+	populate_baserel_uniquekeys(root, rel);
+
 	/*
 	 * We insist that all non-dummy rels have a nonzero rowcount estimate.
 	 */
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7fa502d6e25..29edc8d1b5a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -744,6 +744,54 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 	return newec;
 }
 
+/*
+ * find_ec_position_matching_expr
+ *		Locate the position of EquivalenceClass whose members matching
+ *		the given expr, if any; return -1 if no match.
+ */
+int
+find_ec_position_matching_expr(PlannerInfo *root,
+							   Expr *expr,
+							   RelOptInfo *baserel)
+{
+	int			i = -1;
+
+	while ((i = bms_next_member(baserel->eclass_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth(root->eq_classes, i);
+
+		if (find_ec_member_matching_expr(ec, expr, baserel->relids))
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * build_ec_positions_for_exprs
+ *
+ *		Given a list of exprs, find the related EC positions for each of
+ *		them. if any exprs has no EC related, return NULL;
+ */
+Bitmapset *
+build_ec_positions_for_exprs(PlannerInfo *root, List *exprs, RelOptInfo *rel)
+{
+	ListCell   *lc;
+	Bitmapset  *res = NULL;
+
+	foreach(lc, exprs)
+	{
+		int			pos = find_ec_position_matching_expr(root, lfirst(lc), rel);
+
+		if (pos < 0)
+		{
+			bms_free(res);
+			return NULL;
+		}
+		res = bms_add_member(res, pos);
+	}
+	return res;
+}
+
 /*
  * find_ec_member_matching_expr
  *		Locate an EquivalenceClass member matching the given expr, if any;
diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c
new file mode 100644
index 00000000000..d9e839286dd
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -0,0 +1,384 @@
+/*-------------------------------------------------------------------------
+ *
+ * uniquekey.c
+ *	  Utilities for maintaining uniquekey.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/path/uniquekey.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/paths.h"
+
+
+/* Functions to populate UniqueKey */
+static bool add_uniquekey_for_uniqueindex(PlannerInfo *root,
+										  IndexOptInfo *unique_index,
+										  List *truncatable_exprs,
+										  List *expr_opfamilies);
+static bool unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes);
+
+/* Helper functions to create UniqueKey. */
+static UniqueKey * make_uniquekey(Bitmapset *eclass_indexes,
+								  bool useful_for_distinct);
+static void mark_rel_singlerow(RelOptInfo *rel, int relid);
+
+static UniqueKey * rel_singlerow_uniquekey(RelOptInfo *rel);
+static bool uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey);
+
+/* Debug only */
+static void print_uniquekey(PlannerInfo *root, RelOptInfo *rel);
+
+
+/*
+ * populate_baserel_uniquekeys
+ *
+ *		UniqueKey on baserel comes from unique indexes. Any expression
+ * which equals with Const can be stripped and the left expression are
+ * still unique.
+ */
+void
+populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel)
+{
+	ListCell   *lc;
+	List	   *truncatable_exprs = NIL,
+			   *expr_opfamilies = NIL;
+
+	foreach(lc, rel->baserestrictinfo)
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+		if (rinfo->mergeopfamilies == NIL)
+			continue;
+
+		if (!IsA(rinfo->clause, OpExpr))
+			continue;
+
+		if (bms_is_empty(rinfo->left_relids))
+			truncatable_exprs = lappend(truncatable_exprs, get_rightop(rinfo->clause));
+		else if (bms_is_empty(rinfo->right_relids))
+			truncatable_exprs = lappend(truncatable_exprs, get_leftop(rinfo->clause));
+		else
+			continue;
+
+		expr_opfamilies = lappend(expr_opfamilies, rinfo->mergeopfamilies);
+	}
+
+	foreach(lc, rel->indexlist)
+	{
+		IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+
+		if (!index->unique || !index->immediate ||
+			(index->indpred != NIL && !index->predOK))
+			continue;
+
+		if (add_uniquekey_for_uniqueindex(root, index,
+										  truncatable_exprs,
+										  expr_opfamilies))
+			/* Find a singlerow case, no need to go through other indexes. */
+			return;
+	}
+
+	print_uniquekey(root, rel);
+}
+
+
+/*
+ * add_uniquekey_for_uniqueindex
+ *
+ *		 populate a UniqueKey if it is interesting, return true iff the
+ * UniqueKey is an SingleRow. Only the interesting UniqueKeys are kept.
+ */
+static bool
+add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
+							  List *truncatable_exprs, List *expr_opfamilies)
+{
+	List	   *unique_exprs = NIL;
+	Bitmapset  *unique_ecs = NULL;
+	ListCell   *indexpr_item;
+	RelOptInfo *rel = unique_index->rel;
+	bool		used_for_distinct;
+	int			c;
+
+	indexpr_item = list_head(unique_index->indexprs);
+
+	for (c = 0; c < unique_index->nkeycolumns; c++)
+	{
+		int			attr = unique_index->indexkeys[c];
+		Expr	   *expr;		/* The candidate for UniqueKey expression. */
+		bool		matched_const = false;
+		ListCell   *lc1,
+				   *lc2;
+
+		if (attr > 0)
+		{
+			expr = list_nth_node(TargetEntry, unique_index->indextlist, c)->expr;
+		}
+		else if (attr == 0)
+		{
+			/* Expression index */
+			expr = lfirst(indexpr_item);
+			indexpr_item = lnext(unique_index->indexprs, indexpr_item);
+		}
+		else					/* attr < 0 */
+		{
+			/* Index on OID is possible, not handle it for now. */
+			return false;
+		}
+
+		/* Ignore the expr which are equals to const. */
+		forboth(lc1, truncatable_exprs, lc2, expr_opfamilies)
+		{
+			if (list_member_oid((List *) lfirst(lc2), unique_index->opfamily[c]) &&
+				match_index_to_operand((Node *) lfirst(lc1), c, unique_index))
+			{
+				matched_const = true;
+				break;
+			}
+		}
+
+		if (matched_const)
+			continue;
+
+		unique_exprs = lappend(unique_exprs, expr);
+	}
+
+	if (unique_exprs == NIL)
+	{
+		/* single row is always interesting. */
+		mark_rel_singlerow(rel, rel->relid);
+		return true;
+	}
+
+	/*
+	 * if no EquivalenceClass is found for any exprs in unique exprs, we are
+	 * sure the whole exprs are not in the DISTINCT clause or mergeable join
+	 * clauses. so it is not interesting.
+	 */
+	unique_ecs = build_ec_positions_for_exprs(root, unique_exprs, rel);
+	if (unique_ecs == NULL)
+		return false;
+
+	used_for_distinct = unique_ecs_useful_for_distinct(root, unique_ecs);
+
+
+	rel->uniquekeys = lappend(rel->uniquekeys,
+							  make_uniquekey(unique_ecs,
+											 used_for_distinct));
+	return false;
+}
+
+/*
+ *	make_uniquekey
+ *		Based on UnqiueKey rules, it is impossible for a UnqiueKey
+ * which have eclass_indexes and relid both set. This function just
+ * handle eclass_indexes case.
+ */
+static UniqueKey *
+make_uniquekey(Bitmapset *eclass_indexes, bool useful_for_distinct)
+{
+	UniqueKey  *ukey = makeNode(UniqueKey);
+
+	ukey->eclass_indexes = eclass_indexes;
+	ukey->relid = 0;
+	ukey->use_for_distinct = useful_for_distinct;
+	return ukey;
+}
+
+/*
+ * mark_rel_singlerow
+ *	mark a relation as singlerow.
+ */
+static void
+mark_rel_singlerow(RelOptInfo *rel, int relid)
+{
+	UniqueKey  *ukey = makeNode(UniqueKey);
+
+	ukey->relid = relid;
+	rel->uniquekeys = list_make1(ukey);
+}
+
+/*
+ *
+ *	Return the UniqueKey if rel is a singlerow Relation. othwise
+ * return NULL.
+ */
+static UniqueKey *
+rel_singlerow_uniquekey(RelOptInfo *rel)
+{
+	if (rel->uniquekeys != NIL)
+	{
+		UniqueKey  *ukey = linitial_node(UniqueKey, rel->uniquekeys);
+
+		if (ukey->relid)
+			return ukey;
+	}
+	return NULL;
+}
+
+/*
+ * print_uniquekey
+ *	Used for easier reivew, should be removed before commit.
+ */
+static void
+print_uniquekey(PlannerInfo *root, RelOptInfo *rel)
+{
+	if (!enable_geqo)
+	{
+		ListCell   *lc;
+
+		elog(INFO, "Rel = %s", bmsToString(rel->relids));
+		foreach(lc, rel->uniquekeys)
+		{
+			UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
+
+			elog(INFO, "UNIQUEKEY{indexes=%s, singlerow_rels=%d, use_for_distinct=%d}",
+				 bmsToString(ukey->eclass_indexes),
+				 ukey->relid,
+				 ukey->use_for_distinct);
+		}
+	}
+}
+
+/*
+ *	is it possible that the var contains multi NULL values in the given
+ * RelOptInfo rel?
+ */
+static bool
+var_is_nullable(PlannerInfo *root, Var *var, RelOptInfo *rel)
+{
+	RelOptInfo *base_rel;
+
+	/* check if the outer join can add the NULL values.  */
+	if (bms_overlap(var->varnullingrels, rel->relids))
+		return true;
+
+	/* check if the user data has the NULL values. */
+	base_rel = root->simple_rel_array[var->varno];
+	return !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, base_rel->notnullattrs);
+}
+
+
+/*
+ * uniquekey_contains_multinulls
+ *
+ *	Check if the uniquekey contains nulls values.
+ */
+static bool
+uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey)
+{
+	int			i = -1;
+
+	while ((i = bms_next_member(rel->eclass_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth_node(EquivalenceClass, root->eq_classes, i);
+		ListCell   *lc;
+
+		foreach(lc, ec->ec_members)
+		{
+			EquivalenceMember *em = lfirst_node(EquivalenceMember, lc);
+			Var		   *var;
+
+			var = (Var *) em->em_expr;
+
+			if (!IsA(var, Var))
+				continue;
+
+			if (var_is_nullable(root, var, rel))
+				return true;
+			else
+
+				/*
+				 * If any one of member in the EC is not nullable, we all the
+				 * members are not nullable since they are equal with each
+				 * other.
+				 */
+				break;
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * relation_is_distinct_for
+ *
+ * Check if the rel is distinct for distinct_pathkey.
+ */
+bool
+relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *distinct_pathkey)
+{
+	ListCell   *lc;
+	UniqueKey  *singlerow_ukey = rel_singlerow_uniquekey(rel);
+	Bitmapset  *pathkey_bm = NULL;
+
+	if (singlerow_ukey)
+	{
+		return !uniquekey_contains_multinulls(root, rel, singlerow_ukey);
+	}
+
+	foreach(lc, distinct_pathkey)
+	{
+		PathKey    *pathkey = lfirst_node(PathKey, lc);
+		int			pos = list_member_ptr_pos(root->eq_classes, pathkey->pk_eclass);
+
+		if (pos == -1)
+			return false;
+
+		pathkey_bm = bms_add_member(pathkey_bm, pos);
+	}
+
+	foreach(lc, rel->uniquekeys)
+	{
+		UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
+
+		if (bms_is_subset(ukey->eclass_indexes, pathkey_bm) &&
+			!uniquekey_contains_multinulls(root, rel, ukey))
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * unique_ecs_useful_for_distinct
+ *
+ *	Return true if all the EquivalenceClass for ecs exists in root->distinct_pathkey.
+ */
+static bool
+unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes)
+{
+	int			i = -1;
+
+	while ((i = bms_next_member(ec_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth(root->eq_classes, i);
+		ListCell   *p;
+		bool		found = false;
+
+		foreach(p, root->distinct_pathkeys)
+		{
+			PathKey    *pathkey = lfirst_node(PathKey, p);
+
+			if (ec == pathkey->pk_eclass)
+			{
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			return false;
+	}
+	return true;
+}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index b31d8921211..1ed02d6eb2e 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -2647,6 +2647,14 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
 			/* Add clause to rel's restriction list */
 			rel->baserestrictinfo = lappend(rel->baserestrictinfo,
 											restrictinfo);
+			{
+				List	   *not_null_vars = find_nonnullable_vars((Node *) restrictinfo->clause);
+
+				if (not_null_vars != NIL)
+					rel->notnullattrs = bms_union(rel->notnullattrs,
+												  list_nth(not_null_vars, rel->relid));
+			}
+
 			/* Update security level info */
 			rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
 												 restrictinfo->security_level);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index aafef24cf14..31ae33e5062 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1663,9 +1663,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												gset_data);
 			/* Fix things up if grouping_target contains SRFs */
 			if (parse->hasTargetSRFs)
+			{
 				adjust_paths_for_srfs(root, current_rel,
 									  grouping_targets,
 									  grouping_targets_contain_srfs);
+				current_rel->uniquekeys = NIL;
+			}
 		}
 
 		/*
@@ -1725,6 +1728,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	 * Now we are prepared to build the final-output upperrel.
 	 */
 	final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
+	/* simple_copy_uniquekeys(final_rel, current_rel); */
 
 	/*
 	 * If the input rel is marked consider_parallel and there's nothing that's
@@ -4049,6 +4053,22 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 									  gd,
 									  extra->targetList);
 
+	if (root->parse->groupingSets)
+	{
+		/* nothing to do */
+	}
+	else if (root->parse->groupClause && root->group_pathkeys != NIL)
+	{
+		/*
+		 * populate_uniquekeys_from_pathkeys(root, grouped_rel,
+		 * root->group_pathkeys);
+		 */
+	}
+	else
+	{
+		/* SingleRow Case */
+	}
+
 	/* Build final grouping paths */
 	add_paths_to_grouping_rel(root, input_rel, grouped_rel,
 							  partially_grouped_rel, agg_costs, gd,
@@ -4668,9 +4688,22 @@ create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel)
 {
 	RelOptInfo *distinct_rel;
 
+	/*
+	 * distinct_pathkeys may be NIL if it distinctClause is not sortable. XXX:
+	 * What should we do for the else?
+	 */
+	if (root->distinct_pathkeys &&
+		relation_is_distinct_for(root, input_rel, root->distinct_pathkeys))
+		return input_rel;
+
 	/* For now, do all work in the (DISTINCT, NULL) upperrel */
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
 
+	/*
+	 * populate_uniquekeys_from_pathkeys(root, distinct_rel,
+	 * root->distinct_pathkeys);
+	 */
+
 	/*
 	 * We don't compute anything at this level, so distinct_rel will be
 	 * parallel-safe if the input rel is parallel-safe.  In particular, if
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7159c775fbd..665d3e890cf 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -121,6 +121,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	Index		i;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -175,6 +176,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	/* Retrieve the parallel_workers reloption, or -1 if not set. */
 	rel->rel_parallel_workers = RelationGetParallelWorkers(relation, -1);
 
+	for (i = 0; i < relation->rd_att->natts; i++)
+	{
+		FormData_pg_attribute attr = relation->rd_att->attrs[i];
+
+		if (attr.attnotnull)
+			rel->notnullattrs = bms_add_member(rel->notnullattrs,
+											   attr.attnum - FirstLowInvalidHeapAttributeNumber);
+	}
+
 	/*
 	 * Make list of indexes.  Ignore indexes on system catalogs if told to.
 	 * Don't bother with indexes from traditional inheritance parents.  For
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 5d83f60eb9a..2c9e1fe0164 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -284,6 +284,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 	rel->all_partrels = NULL;
 	rel->partexprs = NULL;
 	rel->nullable_partexprs = NULL;
+	rel->uniquekeys = NIL;
 
 	/*
 	 * Pass assorted information down the inheritance hierarchy.
@@ -745,6 +746,7 @@ build_join_rel(PlannerInfo *root,
 	joinrel->all_partrels = NULL;
 	joinrel->partexprs = NULL;
 	joinrel->nullable_partexprs = NULL;
+	joinrel->uniquekeys = NIL;
 
 	/* Compute information relevant to the foreign relations. */
 	set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 5702fbba60c..0b9e3fe1293 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -876,6 +876,7 @@ typedef struct RelOptInfo
 	 * Vars/Exprs, cost, width
 	 */
 	struct PathTarget *reltarget;
+	List	   *uniquekeys;		/* A list of UniqueKey. */
 
 	/*
 	 * materialization information
@@ -913,6 +914,9 @@ typedef struct RelOptInfo
 	Relids	   *attr_needed pg_node_attr(read_write_ignore);
 	/* array indexed [min_attr .. max_attr] */
 	int32	   *attr_widths pg_node_attr(read_write_ignore);
+
+	/* The not null attrs from catalogs or baserestrictinfo. */
+	Bitmapset  *notnullattrs;
 	/* relids of outer joins that can null this baserel */
 	Relids		nulling_relids;
 	/* LATERAL Vars and PHVs referenced by rel */
@@ -1456,6 +1460,21 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+
+typedef struct UniqueKey
+{
+	pg_node_attr(no_read, no_query_jumble)
+
+	NodeTag		type;
+	Bitmapset  *eclass_indexes; /* Indexes in PlannerInfo's eq_class list of
+								 * ECs that is unique for a RelOptInfo. */
+	int			relid;
+	bool		use_for_distinct;	/* true if it is used in distinct-pathkey,
+									 * in this case we would never check if we
+									 * should discard it during join search. */
+}			UniqueKey;
+
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 529a382d284..1be53fe7f76 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -581,6 +581,8 @@ extern bool list_member_int(const List *list, int datum);
 extern bool list_member_oid(const List *list, Oid datum);
 extern bool list_member_xid(const List *list, TransactionId datum);
 
+extern int	list_member_ptr_pos(const List *list, const void *datum);
+
 extern pg_nodiscard List *list_delete(List *list, void *datum);
 extern pg_nodiscard List *list_delete_ptr(List *list, void *datum);
 extern pg_nodiscard List *list_delete_int(List *list, int datum);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 7b896d821e8..3ac25d47ae7 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -136,11 +136,17 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
 extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec,
 													   Expr *expr,
 													   Relids relids);
+extern int	find_ec_position_matching_expr(PlannerInfo *root,
+										   Expr *expr,
+										   RelOptInfo *rel);
 extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root,
 													EquivalenceClass *ec,
 													List *exprs,
 													Relids relids,
 													bool require_parallel_safe);
+extern Bitmapset *build_ec_positions_for_exprs(PlannerInfo *root,
+											   List *exprs,
+											   RelOptInfo *rel);
 extern bool relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel,
 										 EquivalenceClass *ec,
 										 bool require_parallel_safe);
@@ -259,4 +265,8 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 									List *live_childrels);
 
+extern bool relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
+									 List *distinct_pathkey);
+extern void populate_baserel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *baserel);
 #endif							/* PATHS_H */
diff --git a/src/test/regress/expected/uniquekey.out b/src/test/regress/expected/uniquekey.out
new file mode 100644
index 00000000000..43b876edd1a
--- /dev/null
+++ b/src/test/regress/expected/uniquekey.out
@@ -0,0 +1,69 @@
+CREATE TABLE uk_t (id int primary key, a int not null, b int not null, c int, d int, e int);
+explain (costs off)
+select distinct id from uk_t;
+    QUERY PLAN    
+------------------
+ Seq Scan on uk_t
+(1 row)
+
+explain (costs off)
+select distinct e from uk_t where id = e;
+     QUERY PLAN     
+--------------------
+ Seq Scan on uk_t
+   Filter: (id = e)
+(2 rows)
+
+create unique index on uk_t (a, b);
+create unique index on uk_t (c, d);
+explain (costs off)
+select distinct a, b from uk_t;
+    QUERY PLAN    
+------------------
+ Seq Scan on uk_t
+(1 row)
+
+explain (costs off)
+select distinct c, d from uk_t;
+       QUERY PLAN       
+------------------------
+ HashAggregate
+   Group Key: c, d
+   ->  Seq Scan on uk_t
+(3 rows)
+
+explain (costs off)
+select distinct c, d from uk_t
+where c > 0 and d > 0;
+                QUERY PLAN                 
+-------------------------------------------
+ Bitmap Heap Scan on uk_t
+   Recheck Cond: ((c > 0) AND (d > 0))
+   ->  Bitmap Index Scan on uk_t_c_d_idx
+         Index Cond: ((c > 0) AND (d > 0))
+(4 rows)
+
+explain (costs off)
+select distinct d from uk_t
+where c > 1 and d > 0;
+                   QUERY PLAN                    
+-------------------------------------------------
+ HashAggregate
+   Group Key: d
+   ->  Bitmap Heap Scan on uk_t
+         Recheck Cond: ((c > 1) AND (d > 0))
+         ->  Bitmap Index Scan on uk_t_c_d_idx
+               Index Cond: ((c > 1) AND (d > 0))
+(6 rows)
+
+explain (costs off)
+select distinct d from uk_t
+where c = 1 and d > 0;
+                QUERY PLAN                 
+-------------------------------------------
+ Bitmap Heap Scan on uk_t
+   Recheck Cond: ((c = 1) AND (d > 0))
+   ->  Bitmap Index Scan on uk_t_c_d_idx
+         Index Cond: ((c = 1) AND (d > 0))
+(4 rows)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df9d8503b9..a6531a1f3af 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -62,7 +62,7 @@ test: sanity_check
 # join depends on create_misc
 # ----------
 test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update delete namespace prepared_xacts
-
+test: uniquekey
 # ----------
 # Another group of parallel tests
 # ----------
diff --git a/src/test/regress/sql/uniquekey.sql b/src/test/regress/sql/uniquekey.sql
new file mode 100644
index 00000000000..7d02eb8ee3c
--- /dev/null
+++ b/src/test/regress/sql/uniquekey.sql
@@ -0,0 +1,28 @@
+CREATE TABLE uk_t (id int primary key, a int not null, b int not null, c int, d int, e int);
+
+explain (costs off)
+select distinct id from uk_t;
+
+explain (costs off)
+select distinct e from uk_t where id = e;
+
+create unique index on uk_t (a, b);
+create unique index on uk_t (c, d);
+
+explain (costs off)
+select distinct a, b from uk_t;
+
+explain (costs off)
+select distinct c, d from uk_t;
+
+explain (costs off)
+select distinct c, d from uk_t
+where c > 0 and d > 0;
+
+explain (costs off)
+select distinct d from uk_t
+where c > 1 and d > 0;
+
+explain (costs off)
+select distinct d from uk_t
+where c = 1 and d > 0;
-- 
2.21.0


-- 
Best Regards
Andy Fan

Reply via email to