Hi,
On 7/4/19 6:59 AM, Thomas Munro wrote:
For the MIN query you just need a path with Pathkeys: { i ASC, j ASC
}, UniqueKeys: { i, j }, doing the MAX query you just need j DESC.
David, are you thinking about something like the attached ?
Some questions.
* Do you see UniqueKey as a "complete" planner node ?
- I didn't update the nodes/*.c files for this yet
* Is a UniqueKey with a list of EquivalenceClass best, or a list of
UniqueKey with a single EquivalenceClass
Likely more questions around this coming -- should this be a separate
thread ?
Based on this I'll start to update the v21 patch to use UniqueKey, and
post a new version.
While updating the Loose Index Scan wiki page with links to other
products' terminology on this subject, I noticed that MySQL can
skip-scan MIN() and MAX() in the same query. Hmm. That seems quite
desirable. I think it requires a new kind of skipping: I think you
have to be able to skip to the first AND last key that has each
distinct prefix, and then stick a regular agg on top to collapse them
into one row. Such a path would not be so neatly describable by
UniqueKeys, or indeed by the amskip() interface in the current patch.
I mention all this stuff not because I want us to run before we can
walk, but because to be ready to commit the basic distinct skip scan
feature, I think we should know approximately how it'll handle the
future stuff we'll need.
Thomas, do you have any ideas for this ? I can see that MySQL did the
functionality in two change sets (base and function support), but like
you said we shouldn't paint ourselves into a corner.
Feedback greatly appreciated.
Best regards,
Jesper
diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c
index 4b9e141404..2e07db2e6e 100644
--- a/src/backend/nodes/print.c
+++ b/src/backend/nodes/print.c
@@ -459,6 +459,44 @@ print_pathkeys(const List *pathkeys, const List *rtable)
printf(")\n");
}
+/*
+ * print_unique_key -
+ * unique_key an UniqueKey
+ */
+void
+print_unique_key(const UniqueKey *unique_key, const List *rtable)
+{
+ ListCell *l;
+
+ printf("(");
+ foreach(l, unique_key->eq_clauses)
+ {
+ EquivalenceClass *eclass = (EquivalenceClass *) lfirst(l);
+ ListCell *k;
+ bool first = true;
+
+ /* chase up */
+ while (eclass->ec_merged)
+ eclass = eclass->ec_merged;
+
+ printf("(");
+ foreach(k, eclass->ec_members)
+ {
+ EquivalenceMember *mem = (EquivalenceMember *)
lfirst(k);
+
+ if (first)
+ first = false;
+ else
+ printf(", ");
+ print_expr((Node *) mem->em_expr, rtable);
+ }
+ printf(")");
+ if (lnext(l))
+ printf(", ");
+ }
+ printf(")\n");
+}
+
/*
* print_tl
* print targetlist in a more legible way.
diff --git a/src/backend/optimizer/path/Makefile
b/src/backend/optimizer/path/Makefile
index 6864a62132..8249a6b6db 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
- joinpath.o joinrels.o pathkeys.o tidpath.o
+ joinpath.o joinrels.o pathkeys.o tidpath.o uniquekey.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c
b/src/backend/optimizer/path/allpaths.c
index b7723481b0..a8c8fe8a30 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -3957,6 +3957,14 @@ print_path(PlannerInfo *root, Path *path, int indent)
print_pathkeys(path->pathkeys, root->parse->rtable);
}
+ if (path->unique_key)
+ {
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" unique_key: ");
+ print_unique_key(path->unique_key, root->parse->rtable);
+ }
+
if (join)
{
JoinPath *jp = (JoinPath *) path;
diff --git a/src/backend/optimizer/path/costsize.c
b/src/backend/optimizer/path/costsize.c
index a2a9b1f7be..dbd0bbf3dc 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -705,6 +705,11 @@ cost_index(IndexPath *path, PlannerInfo *root, double
loop_count,
path->path.parallel_aware = true;
}
+ /* Consider cost based on unique key */
+ if (path->path.unique_key)
+ {
+ }
+
/*
* Now interpolate based on estimated index order correlation to get
total
* disk I/O cost for main table accesses.
diff --git a/src/backend/optimizer/path/uniquekey.c
b/src/backend/optimizer/path/uniquekey.c
new file mode 100644
index 0000000000..b4b9432ce5
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -0,0 +1,64 @@
+/*-------------------------------------------------------------------------
+ *
+ * uniquekey.c
+ * Utilities for matching and building unique keys
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/path/uniquekey.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "nodes/pg_list.h"
+
+/*
+ * Build a unique key, if the index query has some defined
+ */
+UniqueKey*
+build_index_uniquekey(PlannerInfo *root, List *pathkeys)
+{
+ UniqueKey *unique_key = NULL;
+ ListCell *l;
+
+ if (pathkeys)
+ {
+ /* Find unique keys and add them to the list */
+ foreach(l, pathkeys)
+ {
+ ListCell *k;
+ PathKey *pk = (PathKey *) lfirst(l);
+ EquivalenceClass *ec = (EquivalenceClass *)
pk->pk_eclass;
+
+ while (ec->ec_merged)
+ ec = ec->ec_merged;
+
+ if (root->distinct_pathkeys)
+ {
+ foreach(k, root->distinct_pathkeys)
+ {
+ PathKey *pk = (PathKey *) lfirst(k);
+ EquivalenceClass *dec = pk->pk_eclass;
+
+ while (dec->ec_merged)
+ dec = dec->ec_merged;
+
+ if (ec == dec)
+ {
+ if (!unique_key)
+ unique_key =
makeNode(UniqueKey);
+
+ unique_key->eq_clauses =
lappend(unique_key->eq_clauses, ec);
+ }
+ }
+ }
+ }
+ }
+
+ return unique_key;
+}
diff --git a/src/backend/optimizer/util/pathnode.c
b/src/backend/optimizer/util/pathnode.c
index d884d2bb00..3f3aa6e57c 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -966,6 +966,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = parallel_workers;
pathnode->pathkeys = NIL; /* seqscan has unordered result */
+ pathnode->unique_key = NULL;
cost_seqscan(pathnode, root, rel, pathnode->param_info);
@@ -990,6 +991,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* samplescan has unordered result */
+ pathnode->unique_key = NULL;
cost_samplescan(pathnode, root, rel, pathnode->param_info);
@@ -1044,6 +1046,7 @@ create_index_path(PlannerInfo *root,
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = 0;
pathnode->path.pathkeys = pathkeys;
+ pathnode->path.unique_key = build_index_uniquekey(root, pathkeys);
pathnode->indexinfo = index;
pathnode->indexclauses = indexclauses;
@@ -1087,6 +1090,7 @@ create_bitmap_heap_path(PlannerInfo *root,
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = parallel_degree;
pathnode->path.pathkeys = NIL; /* always unordered */
+ pathnode->path.unique_key = NULL;
pathnode->bitmapqual = bitmapqual;
@@ -1947,6 +1951,7 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo
*rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = pathkeys;
+ pathnode->unique_key = NULL;
cost_functionscan(pathnode, root, rel, pathnode->param_info);
@@ -1973,6 +1978,7 @@ create_tablefuncscan_path(PlannerInfo *root, RelOptInfo
*rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->unique_key = NULL;
cost_tablefuncscan(pathnode, root, rel, pathnode->param_info);
@@ -1999,6 +2005,7 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->unique_key = NULL;
cost_valuesscan(pathnode, root, rel, pathnode->param_info);
@@ -2024,6 +2031,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer)
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* XXX for now, result is always
unordered */
+ pathnode->unique_key = NULL;
cost_ctescan(pathnode, root, rel, pathnode->param_info);
@@ -2050,6 +2058,7 @@ create_namedtuplestorescan_path(PlannerInfo *root,
RelOptInfo *rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->unique_key = NULL;
cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info);
@@ -2076,6 +2085,7 @@ create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->unique_key = NULL;
cost_resultscan(pathnode, root, rel, pathnode->param_info);
@@ -2102,6 +2112,7 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo
*rel,
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->unique_key = NULL;
/* Cost is the same as for a regular CTE scan */
cost_ctescan(pathnode, root, rel, pathnode->param_info);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 4e2fb39105..a9b67c64f8 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -261,6 +261,7 @@ typedef enum NodeTag
T_EquivalenceMember,
T_PathKey,
T_PathTarget,
+ T_UniqueKey,
T_RestrictInfo,
T_IndexClause,
T_PlaceHolderVar,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 441e64eca9..4ac6207705 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1071,6 +1071,15 @@ typedef struct ParamPathInfo
List *ppi_clauses; /* join clauses available from outer
rels */
} ParamPathInfo;
+/*
+ * UniqueKey
+ */
+typedef struct UniqueKey
+{
+ NodeTag type;
+
+ List *eq_clauses; /* equivalence class */
+} UniqueKey;
/*
* Type "Path" is used as-is for sequential-scan paths, as well as some other
@@ -1100,6 +1109,9 @@ typedef struct ParamPathInfo
*
* "pathkeys" is a List of PathKey nodes (see above), describing the sort
* ordering of the path's output rows.
+ *
+ * "unique_key", if not NULL, is a UniqueKey node (see above),
+ * describing the XXX.
*/
typedef struct Path
{
@@ -1123,6 +1135,8 @@ typedef struct Path
List *pathkeys; /* sort ordering of path's output */
/* pathkeys is a List of PathKey nodes; see above */
+
+ UniqueKey *unique_key; /* the unique key, or NULL if none */
} Path;
/* Macro for extracting a path's parameterization relids; beware double eval */
diff --git a/src/include/nodes/print.h b/src/include/nodes/print.h
index cbff56a724..4ac359a50f 100644
--- a/src/include/nodes/print.h
+++ b/src/include/nodes/print.h
@@ -15,6 +15,7 @@
#define PRINT_H
#include "executor/tuptable.h"
+#include "nodes/pathnodes.h"
#define nodeDisplay(x) pprint(x)
@@ -28,6 +29,7 @@ extern char *pretty_format_node_dump(const char *dump);
extern void print_rt(const List *rtable);
extern void print_expr(const Node *expr, const List *rtable);
extern void print_pathkeys(const List *pathkeys, const List *rtable);
+extern void print_unique_key(const UniqueKey *unique_key, const List *rtable);
extern void print_tl(const List *tlist, const List *rtable);
extern void print_slot(TupleTableSlot *slot);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 7345137d1d..f13d826717 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -235,4 +235,10 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List
*live_childrels);
+/*
+ * uniquekey.c
+ * Utilities for matching and building a unique key
+ */
+extern UniqueKey *build_index_uniquekey(PlannerInfo *root, List *pathkeys);
+
#endif /* PATHS_H */