On 11/17/21 15:45, Thomas wrote:
Thank you for the feedback and sorry for the oversight. I fixed the bug
and attached a new version of the patch.
Kind Regards, Thomas
Am Mi., 17. Nov. 2021 um 15:03 Uhr schrieb Daniel Gustafsson
<dan...@yesql.se <mailto:dan...@yesql.se>>:
This patch fails to compile due to an incorrect function name in an
assertion:
nodeMergejoin.c:297:9: warning: implicit declaration of function
'list_legth' is invalid in C99 [-Wimplicit-function-declaration]
Assert(list_legth(node->rangeclause) < 3);
That still doesn't compile with asserts, because MJCreateRangeData has
Assert(list_length(node->rangeclause) < 3);
but there's no 'node' variable :-/
I took a brief look at the patch, and I think there are two main issues
preventing it from moving forward.
1) no tests
There's not a *single* regression test exercising the new code, so even
after adding Assert(false) to MJCreateRangeData() tests pass just fine.
Clearly, that needs to change.
2) lack of comments
The patch adds a bunch of functions, but it does not really explain what
the functions do (unlike the various surrounding functions). Even if I
can work out what the functions do, it's much harder to determine what
the "contract" is (i.e. what assumptions the function do and what is
guaranteed).
Similarly, the patch modifies/reworks large blocks of executor code,
without updating the comments describing what the block does.
See 0002 for various places that I think are missing comments.
Aside from that, I have a couple minor comments:
3) I'm not quite sure I like "Range Merge Join" to be honest. It's still
a "Merge Join" pretty much. What about ditching the "Range"? There'll
still be "Range Cond" key, which should be good enough I think.
4) Some minor whitespace issues (tabs vs. spaces). See 0002.
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 1bc7921b4ad51cf23723c2ad7c7f618cde6fcba3 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Wed, 17 Nov 2021 22:52:31 +0100
Subject: [PATCH 2/2] review
---
.gitignore | 1 +
src/backend/commands/explain.c | 5 +++++
src/backend/executor/nodeMergejoin.c | 24 +++++++++++++++++++++---
src/backend/optimizer/path/costsize.c | 8 ++++----
src/backend/optimizer/path/joinpath.c | 9 +++++++++
src/backend/optimizer/path/pathkeys.c | 8 ++++++++
src/backend/utils/adt/rangetypes.c | 4 ++++
src/backend/utils/cache/lsyscache.c | 2 ++
src/include/nodes/pathnodes.h | 4 ++--
9 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/.gitignore b/.gitignore
index 5dca1a39ec..23a555185c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ lib*.pc
/tmp_install/
# Thomas
+# XXX Shouldn't be in the patch.
/data/
/server/
.idea/
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 9b7503630c..3610048c90 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1206,6 +1206,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = sname = "Nested Loop";
break;
case T_MergeJoin:
+ /*
+ * XXX Not sure we want to add another top-level node (Merge vs. Range Merge).
+ *
+ * I'd use Merge for both and just differentiate them using Range Cond.
+ */
if(((MergeJoin *) plan)->rangeclause)
{
pname = "Range Merge"; /* "Join" gets added by jointype switch */
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 09131479b1..d7f9ba809e 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -145,7 +145,9 @@ typedef struct MergeJoinClauseData
} MergeJoinClauseData;
/*
- * Runtime data for the range clause
+ * Runtime data for the range clause.
+ *
+ * XXX This really needs some comments, explaining what the fields are for.
*/
typedef struct RangeJoinData
{
@@ -153,7 +155,6 @@ typedef struct RangeJoinData
ExprState *endClause;
ExprState *rangeExpr;
ExprState *elemExpr;
-
} RangeJoinData;
/* Result type for MJEvalOuterValues and MJEvalInnerValues */
@@ -294,10 +295,14 @@ MJCreateRangeData(List *rangeclause,
{
RangeData data;
- Assert(list_length(node->rangeclause) < 3);
+ /* XXX how come this does not crash anything? */
+ Assert(false);
+ // FIXME, there's no 'node' variable
+ // Assert(list_length(node->rangeclause) < 3);
data = (RangeData) palloc0(sizeof(RangeJoinData));
+ /* XXX useless, thanks to the palloc0 */
data->startClause = NULL;
data->endClause = NULL;
data->rangeExpr = NULL;
@@ -518,6 +523,10 @@ MJCompare(MergeJoinState *mergestate)
* Compare the rangejoinable values of the current two input tuples
* and return 0 if they are equal (ie, the outer interval contains the inner),
* >0 if outer > inner, <0 if outer < inner.
+ *
+ * XXX So this is essentially a simple comparator function, except that it
+ * also deals with ranges. That'd deserve explanation how clauses with ranges
+ * works, I guess.
*/
static int
MJCompareRange(MergeJoinState *mergestate)
@@ -1028,6 +1037,9 @@ ExecMergeJoin(PlanState *pstate)
compareResult = MJCompare(node);
MJ_DEBUG_COMPARE(compareResult);
+ /*
+ * XXX Incomprehensible. Maybe add some comments?
+ */
if(compareResult == 0)
{
if(isRangeJoin)
@@ -1237,6 +1249,9 @@ ExecMergeJoin(PlanState *pstate)
/* we need not do MJEvalInnerValues again */
}
+ /*
+ * Comments?
+ */
if(isRangeJoin)
{
compareRangeResult = MJCompareRange(node);
@@ -1348,6 +1363,9 @@ ExecMergeJoin(PlanState *pstate)
if (compareResult == 0)
{
+ /*
+ * XXX Comments?
+ */
if(isRangeJoin)
{
compareRangeResult = MJCompareRange(node);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 041331d6bf..835944661b 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3436,12 +3436,12 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
rescannedtuples;
double rescanratio;
+ /* XXX ??? */
allclauses = list_concat(list_copy(mergeclauses), path->path_rangeclause);
-
- /* Protect some assumptions below that rowcounts aren't zero */
- if (inner_path_rows <= 0)
- inner_path_rows = 1;
+ /* Protect some assumptions below that rowcounts aren't zero */
+ if (inner_path_rows <= 0)
+ inner_path_rows = 1;
/* Mark the path with the correct row estimate */
if (path->jpath.path.param_info)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 4777e0a850..3f93f994a2 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -222,6 +222,7 @@ add_paths_to_joinrel(PlannerInfo *root,
jointype,
&mergejoin_allowed);
+ /* XXX Why only for inner/left joins? */
if (jointype == JOIN_INNER || jointype == JOIN_LEFT)
extra.rangeclause_list = select_rangejoin_clauses(outerrel,
innerrel,
@@ -1238,6 +1239,9 @@ sort_inner_and_outer(PlannerInfo *root,
* The pathkey order returned by select_outer_pathkeys_for_merge() has
* some heuristics behind it (see that function), so be sure to try it
* exactly as-is as well as making variants.
+ *
+ * XXX This comment probably needs updating? The patch significantly
+ * reworks the following code ...
*/
range_pathkeys = select_outer_pathkeys_for_range(root,
@@ -1368,6 +1372,7 @@ sort_inner_and_outer(PlannerInfo *root,
jointype,
extra);
+ /* XXX comments? */
foreach(l, range_pathkeys)
{
PathKey *range_outer_pathkey = (PathKey *) lfirst(l);
@@ -2442,6 +2447,8 @@ select_mergejoin_clauses(PlannerInfo *root,
/*
* range_clause_order
+ *
+ * XXX And what does this do?
*/
static int
range_clause_order(RestrictInfo *first,
@@ -2514,6 +2521,8 @@ range_clause_order(RestrictInfo *first,
/*
* select_rangejoin_clauses
+ *
+ * XXX And what does this do?
*/
static List *
select_rangejoin_clauses(RelOptInfo *outerrel,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index f82e760658..4d1c539cd1 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1217,6 +1217,8 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
/*
* initialize_rangeclause_eclasses
+ *
+ * XXX And what does this do?
*/
void
initialize_rangeclause_eclasses(PlannerInfo *root,
@@ -1404,6 +1406,8 @@ find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
/*
* find_rangeclauses_for_outer_pathkeys
+ *
+ * XXX And what does this do?
*/
List *
find_rangeclauses_for_outer_pathkeys(PlannerInfo *root,
@@ -1637,6 +1641,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
/*
* select_outer_pathkeys_for_range
+ *
+ * XXX And what does this do?
*/
List *
select_outer_pathkeys_for_range(PlannerInfo *root,
@@ -1767,6 +1773,8 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
/*
* make_inner_pathkey_for_range
+ *
+ * XXX And what does this do?
*/
PathKey *
make_inner_pathkey_for_range(PlannerInfo *root,
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
index 3972480519..ad3e2933a7 100644
--- a/src/backend/utils/adt/rangetypes.c
+++ b/src/backend/utils/adt/rangetypes.c
@@ -2509,6 +2509,8 @@ range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum
/*
* Test whether range r is right of a specific element value.
+ *
+ * XXX naming seems a bit strange, all other functions here start with range_
*/
bool
elem_before_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r)
@@ -2539,6 +2541,8 @@ elem_before_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType
/*
* Test whether range r is left of a specific element value.
+ *
+ * XXX naming seems a bit strange, all other functions here start with range_
*/
bool
elem_after_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r)
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index f6cc0cdb8a..136a30ed77 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -391,6 +391,8 @@ get_mergejoin_opfamilies(Oid opno)
/*
* get_rangejoin_opfamilies
+ *
+ * XXX and what does this do?
*/
List *
get_rangejoin_opfamilies(Oid opno)
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c7d4431f06..66840821fb 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2103,8 +2103,8 @@ typedef struct RestrictInfo
/* valid if clause is mergejoinable, else NIL */
List *mergeopfamilies; /* opfamilies containing clause operator */
- List *rangeleftopfamilies;
- List *rangerightopfamilies;
+ List *rangeleftopfamilies; /* comment? */
+ List *rangerightopfamilies; /* comment? */
/* cache space for mergeclause processing; NULL if not yet set */
EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */
--
2.31.1
From 94f4ea1ffe1551e4d58c2b578e3ae371e28e3883 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Wed, 17 Nov 2021 20:24:50 +0100
Subject: [PATCH 1/2] 2021/11/17
---
.gitignore | 11 +
src/backend/commands/explain.c | 14 +-
src/backend/executor/nodeMergejoin.c | 209 ++++++++++++++-
src/backend/nodes/copyfuncs.c | 2 +
src/backend/nodes/outfuncs.c | 4 +
src/backend/nodes/readfuncs.c | 1 +
src/backend/optimizer/path/costsize.c | 14 +-
src/backend/optimizer/path/joinpath.c | 313 +++++++++++++++++++++-
src/backend/optimizer/path/pathkeys.c | 173 ++++++++++++
src/backend/optimizer/plan/createplan.c | 10 +
src/backend/optimizer/plan/initsplan.c | 55 ++++
src/backend/optimizer/plan/setrefs.c | 8 +
src/backend/optimizer/util/pathnode.c | 3 +
src/backend/optimizer/util/restrictinfo.c | 2 +
src/backend/utils/adt/rangetypes.c | 60 +++++
src/backend/utils/cache/lsyscache.c | 34 +++
src/include/nodes/execnodes.h | 3 +
src/include/nodes/pathnodes.h | 4 +
src/include/nodes/plannodes.h | 1 +
src/include/optimizer/pathnode.h | 1 +
src/include/optimizer/paths.h | 10 +
src/include/utils/lsyscache.h | 1 +
src/include/utils/rangetypes.h | 2 +
23 files changed, 906 insertions(+), 29 deletions(-)
diff --git a/.gitignore b/.gitignore
index 794e35b73c..5dca1a39ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,14 @@ lib*.pc
/Debug/
/Release/
/tmp_install/
+
+# Thomas
+/data/
+/server/
+.idea/
+.settings/
+.cproject
+.project
+logfile
+src/backend/catalog/postgres.*
+*.patch
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 10644dfac4..9b7503630c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1206,8 +1206,16 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = sname = "Nested Loop";
break;
case T_MergeJoin:
- pname = "Merge"; /* "Join" gets added by jointype switch */
- sname = "Merge Join";
+ if(((MergeJoin *) plan)->rangeclause)
+ {
+ pname = "Range Merge"; /* "Join" gets added by jointype switch */
+ sname = "Range Merge Join";
+ }
+ else
+ {
+ pname = "Merge"; /* "Join" gets added by jointype switch */
+ sname = "Merge Join";
+ }
break;
case T_HashJoin:
pname = "Hash"; /* "Join" gets added by jointype switch */
@@ -1948,6 +1956,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_MergeJoin:
show_upper_qual(((MergeJoin *) plan)->mergeclauses,
"Merge Cond", planstate, ancestors, es);
+ show_upper_qual(((MergeJoin *) plan)->rangeclause,
+ "Range Cond", planstate, ancestors, es);
show_upper_qual(((MergeJoin *) plan)->join.joinqual,
"Join Filter", planstate, ancestors, es);
if (((MergeJoin *) plan)->join.joinqual)
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index b41454ab6d..09131479b1 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -93,11 +93,15 @@
#include "postgres.h"
#include "access/nbtree.h"
+#include "catalog/pg_operator.h"
#include "executor/execdebug.h"
#include "executor/nodeMergejoin.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "utils/rangetypes.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/typcache.h"
/*
@@ -140,6 +144,18 @@ typedef struct MergeJoinClauseData
SortSupportData ssup;
} MergeJoinClauseData;
+/*
+ * Runtime data for the range clause
+ */
+typedef struct RangeJoinData
+{
+ ExprState *startClause;
+ ExprState *endClause;
+ ExprState *rangeExpr;
+ ExprState *elemExpr;
+
+} RangeJoinData;
+
/* Result type for MJEvalOuterValues and MJEvalInnerValues */
typedef enum
{
@@ -269,6 +285,57 @@ MJExamineQuals(List *mergeclauses,
return clauses;
}
+/*
+ * MJCreateRangeData
+ */
+static RangeData
+MJCreateRangeData(List *rangeclause,
+ PlanState *parent)
+{
+ RangeData data;
+
+ Assert(list_length(node->rangeclause) < 3);
+
+ data = (RangeData) palloc0(sizeof(RangeJoinData));
+
+ data->startClause = NULL;
+ data->endClause = NULL;
+ data->rangeExpr = NULL;
+ data->elemExpr = NULL;
+
+ if(list_length(rangeclause) == 2)
+ {
+ data->startClause = ExecInitExpr(linitial(rangeclause), parent);
+ data->endClause = ExecInitExpr(lsecond(rangeclause), parent);
+ }
+ else
+ {
+ OpExpr *qual = (OpExpr *) linitial(rangeclause);
+ ExprState *lexpr;
+ ExprState *rexpr;
+
+ /*
+ * Prepare the input expressions for execution.
+ */
+ lexpr = ExecInitExpr((Expr *) get_leftop(qual), parent);
+ rexpr = ExecInitExpr((Expr *) get_rightop(qual), parent);
+
+ if(qual->opno == OID_RANGE_CONTAINS_ELEM_OP)
+ {
+ data->rangeExpr = lexpr;
+ data->elemExpr = rexpr;
+ }
+ else
+ {
+ Assert(qual->opno == OID_RANGE_ELEM_CONTAINED_OP);
+ data->rangeExpr = rexpr;
+ data->elemExpr = lexpr;
+ }
+ }
+
+ return data;
+}
+
/*
* MJEvalOuterValues
*
@@ -445,6 +512,73 @@ MJCompare(MergeJoinState *mergestate)
}
+/*
+ * MJCompareRange
+ *
+ * Compare the rangejoinable values of the current two input tuples
+ * and return 0 if they are equal (ie, the outer interval contains the inner),
+ * >0 if outer > inner, <0 if outer < inner.
+ */
+static int
+MJCompareRange(MergeJoinState *mergestate)
+{
+ int result = 0;
+ bool isNull;
+ MemoryContext oldContext;
+ RangeData rangeData = mergestate->mj_RangeData;
+ ExprContext *econtext = mergestate->js.ps.ps_ExprContext;
+ ExprState *endClause = rangeData->endClause,
+ *startClause = rangeData->startClause,
+ *rangeExpr = rangeData->rangeExpr,
+ *elemExpr = rangeData->elemExpr;
+
+ /*
+ * Call the comparison functions in short-lived context, in case they leak
+ * memory.
+ */
+ ResetExprContext(econtext);
+
+ oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
+
+ econtext->ecxt_outertuple = mergestate->mj_OuterTupleSlot;
+ econtext->ecxt_innertuple = mergestate->mj_InnerTupleSlot;
+
+ if (endClause != NULL)
+ {
+ Assert(startClause != NULL);
+
+ if (!ExecEvalExprSwitchContext(endClause, econtext, &isNull))
+ result = -1;
+ else if (!ExecEvalExprSwitchContext(startClause, econtext, &isNull))
+ result = 1;
+ }
+ else
+ {
+ Datum rangeDatum,
+ elemDatum;
+ Oid rangeType;
+ TypeCacheEntry *typecache;
+
+ Assert(rangeExpr != NULL && elemExpr != NULL);
+
+ rangeDatum = ExecEvalExprSwitchContext(rangeExpr, econtext, &isNull);
+ elemDatum = ExecEvalExprSwitchContext(elemExpr, econtext, &isNull);
+
+ rangeType = exprType((Node *) rangeExpr->expr);
+ typecache = lookup_type_cache(rangeType, TYPECACHE_RANGE_INFO);
+
+ if (elem_after_range_internal(typecache, elemDatum, DatumGetRangeTypeP(rangeDatum)))
+ result = -1;
+ else if (elem_before_range_internal(typecache, elemDatum, DatumGetRangeTypeP(rangeDatum)))
+ result = 1;
+ }
+
+ MemoryContextSwitchTo(oldContext);
+
+ return result;
+}
+
+
/*
* Generate a fake join tuple with nulls for the inner tuple,
* and return it if it passes the non-join quals.
@@ -604,6 +738,7 @@ ExecMergeJoin(PlanState *pstate)
ExprState *otherqual;
bool qualResult;
int compareResult;
+ int compareRangeResult;
PlanState *innerPlan;
TupleTableSlot *innerTupleSlot;
PlanState *outerPlan;
@@ -611,6 +746,7 @@ ExecMergeJoin(PlanState *pstate)
ExprContext *econtext;
bool doFillOuter;
bool doFillInner;
+ bool isRangeJoin;
CHECK_FOR_INTERRUPTS();
@@ -624,6 +760,7 @@ ExecMergeJoin(PlanState *pstate)
otherqual = node->js.ps.qual;
doFillOuter = node->mj_FillOuter;
doFillInner = node->mj_FillInner;
+ isRangeJoin = node->mj_RangeJoin;
/*
* Reset per-tuple memory context to free any expression evaluation
@@ -891,8 +1028,23 @@ ExecMergeJoin(PlanState *pstate)
compareResult = MJCompare(node);
MJ_DEBUG_COMPARE(compareResult);
- if (compareResult == 0)
- node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ if(compareResult == 0)
+ {
+ if(isRangeJoin)
+ {
+ compareRangeResult = MJCompareRange(node);
+
+ if (compareRangeResult == 0)
+ node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ else
+ {
+ Assert(compareRangeResult < 0);
+ node->mj_JoinState = EXEC_MJ_NEXTOUTER;
+ }
+ }
+ else
+ node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ }
else
{
Assert(compareResult < 0);
@@ -1085,7 +1237,19 @@ ExecMergeJoin(PlanState *pstate)
/* we need not do MJEvalInnerValues again */
}
- node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ if(isRangeJoin)
+ {
+ compareRangeResult = MJCompareRange(node);
+
+ if(compareRangeResult == 0)
+ node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ else if(compareRangeResult < 0)
+ node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE;
+ else /* compareRangeResult > 0 */
+ node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE;
+ }
+ else
+ node->mj_JoinState = EXEC_MJ_JOINTUPLES;
}
else
{
@@ -1184,12 +1348,28 @@ ExecMergeJoin(PlanState *pstate)
if (compareResult == 0)
{
- if (!node->mj_SkipMarkRestore)
- ExecMarkPos(innerPlan);
-
- MarkInnerTuple(node->mj_InnerTupleSlot, node);
-
- node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ if(isRangeJoin)
+ {
+ compareRangeResult = MJCompareRange(node);
+ if(compareRangeResult == 0)
+ {
+ if (!node->mj_SkipMarkRestore)
+ ExecMarkPos(innerPlan);
+ MarkInnerTuple(node->mj_InnerTupleSlot, node);
+ node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ }
+ else if(compareRangeResult < 0)
+ node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE;
+ else /* compareRangeResult > 0 */
+ node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE;
+ }
+ else
+ {
+ if (!node->mj_SkipMarkRestore)
+ ExecMarkPos(innerPlan);
+ MarkInnerTuple(node->mj_InnerTupleSlot, node);
+ node->mj_JoinState = EXEC_MJ_JOINTUPLES;
+ }
}
else if (compareResult < 0)
node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE;
@@ -1532,6 +1712,17 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
ExecInitQual(node->join.joinqual, (PlanState *) mergestate);
/* mergeclauses are handled below */
+ /*
+ * initialize range join
+ */
+ if(node->rangeclause)
+ {
+ mergestate->mj_RangeData = MJCreateRangeData(node->rangeclause, (PlanState *) mergestate);
+ mergestate->mj_RangeJoin = true;
+ }
+ else
+ mergestate->mj_RangeJoin = false;
+
/*
* detect whether we need only consider the first matching inner tuple
*/
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index ad1ea2ff2f..262676b1fd 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2349,6 +2349,8 @@ _copyRestrictInfo(const RestrictInfo *from)
COPY_SCALAR_FIELD(norm_selec);
COPY_SCALAR_FIELD(outer_selec);
COPY_NODE_FIELD(mergeopfamilies);
+ COPY_NODE_FIELD(rangeleftopfamilies);
+ COPY_NODE_FIELD(rangerightopfamilies);
/* EquivalenceClasses are never copied, so shallow-copy the pointers */
COPY_SCALAR_FIELD(left_ec);
COPY_SCALAR_FIELD(right_ec);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 23f23f11dc..1361c86d08 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -765,6 +765,7 @@ _outMergeJoin(StringInfo str, const MergeJoin *node)
WRITE_BOOL_FIELD(skip_mark_restore);
WRITE_NODE_FIELD(mergeclauses);
+ WRITE_NODE_FIELD(rangeclause);
numCols = list_length(node->mergeclauses);
@@ -2243,6 +2244,7 @@ _outMergePath(StringInfo str, const MergePath *node)
_outJoinPathInfo(str, (const JoinPath *) node);
WRITE_NODE_FIELD(path_mergeclauses);
+ WRITE_NODE_FIELD(path_rangeclause);
WRITE_NODE_FIELD(outersortkeys);
WRITE_NODE_FIELD(innersortkeys);
WRITE_BOOL_FIELD(skip_mark_restore);
@@ -2559,6 +2561,8 @@ _outRestrictInfo(StringInfo str, const RestrictInfo *node)
WRITE_FLOAT_FIELD(norm_selec, "%.4f");
WRITE_FLOAT_FIELD(outer_selec, "%.4f");
WRITE_NODE_FIELD(mergeopfamilies);
+ WRITE_NODE_FIELD(rangeleftopfamilies);
+ WRITE_NODE_FIELD(rangerightopfamilies);
/* don't write left_ec, leads to infinite recursion in plan tree dump */
/* don't write right_ec, leads to infinite recursion in plan tree dump */
WRITE_NODE_FIELD(left_em);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index abf08b7a2f..1e71f26eb7 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2173,6 +2173,7 @@ _readMergeJoin(void)
READ_BOOL_FIELD(skip_mark_restore);
READ_NODE_FIELD(mergeclauses);
+ READ_NODE_FIELD(rangeclause);
numCols = list_length(local_node->mergeclauses);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1e4d404f02..041331d6bf 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3419,6 +3419,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
double inner_path_rows = inner_path->rows;
List *mergeclauses = path->path_mergeclauses;
List *innersortkeys = path->innersortkeys;
+ List *allclauses;
Cost startup_cost = workspace->startup_cost;
Cost run_cost = workspace->run_cost;
Cost inner_run_cost = workspace->inner_run_cost;
@@ -3435,9 +3436,12 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
rescannedtuples;
double rescanratio;
- /* Protect some assumptions below that rowcounts aren't zero */
- if (inner_path_rows <= 0)
- inner_path_rows = 1;
+ allclauses = list_concat(list_copy(mergeclauses), path->path_rangeclause);
+
+
+ /* Protect some assumptions below that rowcounts aren't zero */
+ if (inner_path_rows <= 0)
+ inner_path_rows = 1;
/* Mark the path with the correct row estimate */
if (path->jpath.path.param_info)
@@ -3466,7 +3470,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
* Compute cost of the mergequals and qpquals (other restriction clauses)
* separately.
*/
- cost_qual_eval(&merge_qual_cost, mergeclauses, root);
+ cost_qual_eval(&merge_qual_cost, allclauses, root);
cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
qp_qual_cost.startup -= merge_qual_cost.startup;
qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
@@ -3490,7 +3494,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
* Get approx # tuples passing the mergequals. We use approx_tuple_count
* here because we need an estimate done with JOIN_INNER semantics.
*/
- mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
+ mergejointuples = approx_tuple_count(root, &path->jpath, allclauses);
/*
* When there are equal merge keys in the outer relation, the mergejoin
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 0f3ad8aa65..4777e0a850 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -16,6 +16,7 @@
#include <math.h>
+#include "catalog/pg_operator.h"
#include "executor/executor.h"
#include "foreign/fdwapi.h"
#include "nodes/nodeFuncs.h"
@@ -24,6 +25,7 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
+#include "utils/lsyscache.h"
#include "utils/typcache.h"
/* Hook for plugins to get control in add_paths_to_joinrel() */
@@ -84,6 +86,9 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
List *restrictlist,
JoinType jointype,
bool *mergejoin_allowed);
+static List *select_rangejoin_clauses(RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist);
static void generate_mergejoin_paths(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *innerrel,
@@ -147,6 +152,7 @@ add_paths_to_joinrel(PlannerInfo *root,
extra.restrictlist = restrictlist;
extra.mergeclause_list = NIL;
+ extra.rangeclause_list = NIL;
extra.sjinfo = sjinfo;
extra.param_source_rels = NULL;
@@ -207,6 +213,7 @@ add_paths_to_joinrel(PlannerInfo *root,
* it's a full join.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
+ {
extra.mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel,
@@ -215,6 +222,12 @@ add_paths_to_joinrel(PlannerInfo *root,
jointype,
&mergejoin_allowed);
+ if (jointype == JOIN_INNER || jointype == JOIN_LEFT)
+ extra.rangeclause_list = select_rangejoin_clauses(outerrel,
+ innerrel,
+ restrictlist);
+ }
+
/*
* If it's SEMI, ANTI, or inner_unique join, compute correction factors
* for cost estimation. These will be the same for all paths.
@@ -792,6 +805,7 @@ try_mergejoin_path(PlannerInfo *root,
Path *inner_path,
List *pathkeys,
List *mergeclauses,
+ List *rangeclause,
List *outersortkeys,
List *innersortkeys,
JoinType jointype,
@@ -865,6 +879,7 @@ try_mergejoin_path(PlannerInfo *root,
pathkeys,
required_outer,
mergeclauses,
+ rangeclause,
outersortkeys,
innersortkeys));
}
@@ -941,6 +956,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
pathkeys,
NULL,
mergeclauses,
+ NIL,
outersortkeys,
innersortkeys));
}
@@ -1122,7 +1138,8 @@ sort_inner_and_outer(PlannerInfo *root,
Path *inner_path;
Path *cheapest_partial_outer = NULL;
Path *cheapest_safe_inner = NULL;
- List *all_pathkeys;
+ List *merge_pathkeys;
+ List *range_pathkeys;
ListCell *l;
/*
@@ -1222,25 +1239,80 @@ sort_inner_and_outer(PlannerInfo *root,
* some heuristics behind it (see that function), so be sure to try it
* exactly as-is as well as making variants.
*/
- all_pathkeys = select_outer_pathkeys_for_merge(root,
- extra->mergeclause_list,
- joinrel);
- foreach(l, all_pathkeys)
+ range_pathkeys = select_outer_pathkeys_for_range(root,
+ extra->rangeclause_list);
+
+ merge_pathkeys = select_outer_pathkeys_for_merge(root,
+ extra->mergeclause_list,
+ joinrel);
+
+ if(merge_pathkeys == NIL && enable_mergejoin)
+ {
+ foreach(l, range_pathkeys)
+ {
+ PathKey *range_pathkey = (PathKey *) lfirst(l);
+ List *outerkeys = list_make1(range_pathkey);
+ List *innerkeys = NIL;
+ List *rangeclauses;
+ ListCell *lc;
+
+ rangeclauses = find_rangeclauses_for_outer_pathkeys(root,
+ outerkeys,
+ NIL,
+ extra->rangeclause_list);
+
+ foreach(lc, rangeclauses)
+ {
+ List *rangeclause = (List *) lfirst(lc);
+ PathKey *range_inner_pathkey =
+ make_inner_pathkey_for_range(root,
+ rangeclause);
+
+ innerkeys = lappend(innerkeys, range_inner_pathkey);
+
+ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ outerkeys);
+
+ /*
+ * And now we can make the path.
+ *
+ * Note: it's possible that the cheapest paths will already be sorted
+ * properly. try_mergejoin_path will detect that case and suppress an
+ * explicit sort step, so we needn't do so here.
+ */
+ try_mergejoin_path(root,
+ joinrel,
+ outer_path,
+ inner_path,
+ merge_pathkeys,
+ NIL,
+ rangeclause,
+ outerkeys,
+ innerkeys,
+ jointype,
+ extra,
+ false);
+ }
+ }
+ }
+
+
+ foreach(l, merge_pathkeys)
{
List *front_pathkey = (List *) lfirst(l);
List *cur_mergeclauses;
List *outerkeys;
List *innerkeys;
- List *merge_pathkeys;
+ List *mergejoin_pathkeys;
/* Make a pathkey list with this guy first */
- if (l != list_head(all_pathkeys))
+ if (l != list_head(merge_pathkeys))
outerkeys = lcons(front_pathkey,
- list_delete_nth_cell(list_copy(all_pathkeys),
- foreach_current_index(l)));
+ list_delete_ptr(list_copy(merge_pathkeys),
+ front_pathkey));
else
- outerkeys = all_pathkeys; /* no work at first one... */
+ outerkeys = merge_pathkeys; /* no work at first one... */
/* Sort the mergeclauses into the corresponding ordering */
cur_mergeclauses =
@@ -1257,7 +1329,7 @@ sort_inner_and_outer(PlannerInfo *root,
outerkeys);
/* Build pathkeys representing output sort order */
- merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ mergejoin_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
/*
@@ -1271,8 +1343,9 @@ sort_inner_and_outer(PlannerInfo *root,
joinrel,
outer_path,
inner_path,
- merge_pathkeys,
+ mergejoin_pathkeys,
cur_mergeclauses,
+ NIL,
outerkeys,
innerkeys,
jointype,
@@ -1288,12 +1361,88 @@ sort_inner_and_outer(PlannerInfo *root,
joinrel,
cheapest_partial_outer,
cheapest_safe_inner,
- merge_pathkeys,
+ mergejoin_pathkeys,
cur_mergeclauses,
outerkeys,
innerkeys,
jointype,
extra);
+
+ foreach(l, range_pathkeys)
+ {
+ PathKey *range_outer_pathkey = (PathKey *) lfirst(l);
+ List *range_outerkeys = list_copy(outerkeys);
+ List *range_innerkeys;
+ List *rangeclauses;
+ ListCell *lc;
+
+ if(list_member_ptr(range_outerkeys, range_outer_pathkey))
+ {
+ if(!equal(llast(range_outerkeys), range_outer_pathkey))
+ continue;
+ }
+ else
+ range_outerkeys = lappend(range_outerkeys, range_outer_pathkey);
+
+ /* Sort the mergeclauses into the corresponding ordering */
+ cur_mergeclauses =
+ find_mergeclauses_for_outer_pathkeys(root,
+ range_outerkeys,
+ extra->mergeclause_list);
+
+ /* Should have used them all... */
+ Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
+
+ /* Build sort pathkeys for the inner side */
+ range_innerkeys = make_inner_pathkeys_for_merge(root,
+ cur_mergeclauses,
+ range_outerkeys);
+
+ rangeclauses = find_rangeclauses_for_outer_pathkeys(root,
+ range_outerkeys,
+ cur_mergeclauses,
+ extra->rangeclause_list);
+
+ foreach(lc, rangeclauses)
+ {
+ List *cur_rangeclause = (List *) lfirst(lc);
+ PathKey *range_inner_pathkey;
+
+ range_inner_pathkey = make_inner_pathkey_for_range(root, cur_rangeclause);
+
+ if(list_member_ptr(range_innerkeys, range_inner_pathkey))
+ {
+ if(!equal(llast(range_innerkeys), range_inner_pathkey))
+ continue;
+ }
+ else
+ range_innerkeys = lappend(range_innerkeys, range_inner_pathkey);
+
+
+ mergejoin_pathkeys = build_join_pathkeys(root, joinrel, jointype,
+ range_outerkeys);
+
+ /*
+ * And now we can make the path.
+ *
+ * Note: it's possible that the cheapest paths will already be sorted
+ * properly. try_mergejoin_path will detect that case and suppress an
+ * explicit sort step, so we needn't do so here.
+ */
+ try_mergejoin_path(root,
+ joinrel,
+ outer_path,
+ inner_path,
+ mergejoin_pathkeys,
+ cur_mergeclauses,
+ cur_rangeclause,
+ range_outerkeys,
+ range_innerkeys,
+ jointype,
+ extra,
+ false);
+ }
+ }
}
}
@@ -1379,6 +1528,7 @@ generate_mergejoin_paths(PlannerInfo *root,
merge_pathkeys,
mergeclauses,
NIL,
+ NIL,
innersortkeys,
jointype,
extra,
@@ -1477,6 +1627,7 @@ generate_mergejoin_paths(PlannerInfo *root,
newclauses,
NIL,
NIL,
+ NIL,
jointype,
extra,
is_partial);
@@ -1521,6 +1672,7 @@ generate_mergejoin_paths(PlannerInfo *root,
newclauses,
NIL,
NIL,
+ NIL,
jointype,
extra,
is_partial);
@@ -2287,3 +2439,138 @@ select_mergejoin_clauses(PlannerInfo *root,
return result_list;
}
+
+/*
+ * range_clause_order
+ */
+static int
+range_clause_order(RestrictInfo *first,
+ RestrictInfo *second)
+{
+ /*
+ * Extract details from first restrictinfo
+ */
+ Node *first_left = get_leftop(first->clause),
+ *first_right = get_rightop(first->clause);
+ bool first_outer_is_left = first->outer_is_left;
+ int first_strategy = get_op_opfamily_strategy(((OpExpr *) first->clause)->opno,
+ (linitial_oid(first->rangeleftopfamilies)));
+ bool first_less = (first_strategy == BTLessStrategyNumber ||
+ first_strategy == BTLessEqualStrategyNumber);
+
+ /*
+ * Extract details from second restrictinfo
+ */
+ Node *second_left = get_leftop(second->clause),
+ *second_right = get_rightop(second->clause);
+ bool second_outer_is_left = second->outer_is_left;
+ int second_strategy = get_op_opfamily_strategy(((OpExpr *) second->clause)->opno,
+ (linitial_oid(second->rangeleftopfamilies)));
+ bool second_less = (second_strategy == BTLessStrategyNumber ||
+ second_strategy == BTLessEqualStrategyNumber);
+
+ /*
+ * Check for rangeclause
+ */
+ if (first_less && second_less)
+ {
+ if (!first_outer_is_left && second_outer_is_left &&
+ equal(first_left, second_right))
+ return 2;
+ else if (first_outer_is_left && !second_outer_is_left &&
+ equal(first_right, second_left))
+ return 1;
+ }
+ else if (!first_less && !second_less)
+ {
+ if (!first_outer_is_left && second_outer_is_left &&
+ equal(first_left, second_right))
+ return 1;
+ else if (first_outer_is_left && !second_outer_is_left &&
+ equal(first_right, second_left))
+ return 2;
+ }
+ else if (first_less && !second_less)
+ {
+ if (!first_outer_is_left && !second_outer_is_left &&
+ equal(first_left, second_left))
+ return 2;
+ else if (first_outer_is_left && second_outer_is_left &&
+ equal(first_right, second_right))
+ return 1;
+ }
+ else if (!first_less && second_less)
+ {
+ if (!first_outer_is_left && !second_outer_is_left &&
+ equal(first_left, second_left))
+ return 1;
+ else if (first_outer_is_left && second_outer_is_left &&
+ equal(first_right, second_right))
+ return 2;
+ }
+
+ return 0;
+}
+
+/*
+ * select_rangejoin_clauses
+ */
+static List *
+select_rangejoin_clauses(RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist)
+{
+ List *result_list = NIL;
+ ListCell *l;
+ List *range_candidates = NIL;
+
+ foreach(l, restrictlist)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+ OpExpr *clause = (OpExpr *) restrictinfo->clause;
+
+ /* Check that clause is a rangejoinable operator clause */
+ if (restrictinfo->rangeleftopfamilies == NIL)
+ continue; /* not rangejoinable */
+
+ /*
+ * Check if clause has the form "outer op inner" or "inner op outer".
+ */
+ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ continue; /* no good for these input relations */
+
+ if (restrictinfo->rangeleftopfamilies == restrictinfo->rangerightopfamilies)
+ {
+ ListCell *lc;
+ List *range_clause;
+
+ foreach(lc, range_candidates)
+ {
+ RestrictInfo *candidate = (RestrictInfo *) lfirst(lc);
+
+ switch (range_clause_order(restrictinfo, candidate))
+ {
+ case 1:
+ range_clause = list_make2(restrictinfo, candidate);
+ result_list = lappend(result_list, range_clause);
+ break;
+
+ case 2:
+ range_clause = list_make2(candidate, restrictinfo);
+ result_list = lappend(result_list, range_clause);
+ break;
+ default:
+ break;
+ }
+ }
+ range_candidates = lappend(range_candidates, restrictinfo);
+ }
+ else if ((clause->opno == OID_RANGE_CONTAINS_ELEM_OP && restrictinfo->outer_is_left) ||
+ (clause->opno == OID_RANGE_ELEM_CONTAINED_OP && !restrictinfo->outer_is_left))
+ {
+ result_list = lappend(result_list, list_make1(restrictinfo));
+ }
+ }
+ list_free(range_candidates);
+ return result_list;
+}
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 216dd26385..f82e760658 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -19,6 +19,7 @@
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
+#include "catalog/pg_operator.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
@@ -1214,6 +1215,60 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
true);
}
+/*
+ * initialize_rangeclause_eclasses
+ */
+void
+initialize_rangeclause_eclasses(PlannerInfo *root,
+ RestrictInfo *restrictinfo)
+{
+ Expr *clause = restrictinfo->clause;
+ Oid lefttype,
+ righttype,
+ opno;
+
+ /* Should be a rangeclause ... */
+ Assert(restrictinfo->rangeleftopfamilies != NIL &&
+ restrictinfo->rangerightopfamilies != NIL);
+ /* ... with links not yet set */
+ Assert(restrictinfo->left_ec == NULL);
+ Assert(restrictinfo->right_ec == NULL);
+
+ opno = ((OpExpr *) clause)->opno;
+
+ /* Need the declared input types of the operator */
+ op_input_types(opno, &lefttype, &righttype);
+
+ if(opno == OID_RANGE_CONTAINS_ELEM_OP)
+ righttype = exprType(get_rightop(clause));
+
+ else if(opno == OID_RANGE_ELEM_CONTAINED_OP)
+ lefttype = exprType(get_leftop(clause));
+
+
+ /* Find or create a matching EquivalenceClass for each side */
+ restrictinfo->left_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_leftop(clause),
+ restrictinfo->nullable_relids,
+ restrictinfo->rangeleftopfamilies,
+ lefttype,
+ ((OpExpr *) clause)->inputcollid,
+ 0,
+ NULL,
+ true);
+ restrictinfo->right_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_rightop(clause),
+ restrictinfo->nullable_relids,
+ restrictinfo->rangerightopfamilies,
+ righttype,
+ ((OpExpr *) clause)->inputcollid,
+ 0,
+ NULL,
+ true);
+}
+
/*
* update_mergeclause_eclasses
* Make the cached EquivalenceClass links valid in a mergeclause
@@ -1347,6 +1402,64 @@ find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
return mergeclauses;
}
+/*
+ * find_rangeclauses_for_outer_pathkeys
+ */
+List *
+find_rangeclauses_for_outer_pathkeys(PlannerInfo *root,
+ List *pathkeys,
+ List *mergeclauses,
+ List *rangeclauses)
+{
+ ListCell *i;
+ RestrictInfo *mergeclause = NULL;
+ List *result_list = NIL;
+
+ if(mergeclauses)
+ mergeclause = llast(mergeclauses);
+
+ foreach(i, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+ ListCell *j;
+
+ if(mergeclause != NULL)
+ {
+ if(mergeclause->outer_is_left)
+ {
+ if(mergeclause->left_ec != pathkey_ec)
+ continue;
+ }
+ else if(mergeclause->right_ec != pathkey_ec)
+ continue;
+ }
+
+ foreach(j, rangeclauses)
+ {
+ List *rangeclause = (List *) lfirst(j);
+ RestrictInfo *rinfo = (RestrictInfo *) linitial(rangeclause);
+ EquivalenceClass *clause_ec;
+
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->left_ec : rinfo->right_ec;
+
+ if (clause_ec == pathkey_ec)
+ result_list = lappend(result_list, rangeclause);
+ }
+
+ /*
+ * Was it the last possible pathkey?
+ */
+ if(mergeclause == NULL)
+ break;
+ else
+ mergeclause = NULL;
+
+ }
+ return result_list;
+}
+
/*
* select_outer_pathkeys_for_merge
* Builds a pathkey list representing a possible sort ordering
@@ -1522,6 +1635,37 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
return pathkeys;
}
+/*
+ * select_outer_pathkeys_for_range
+ */
+List *
+select_outer_pathkeys_for_range(PlannerInfo *root,
+ List *rangeclauses)
+{
+ EquivalenceClass *ec;
+ PathKey *pathkey;
+ List *pathkeys = NIL;
+ ListCell *lc;
+
+ foreach(lc, rangeclauses){
+ List *rangeclause = lfirst(lc);
+ RestrictInfo *rinfo = linitial(rangeclause);
+
+ if (rinfo->outer_is_left)
+ ec = rinfo->left_ec;
+ else
+ ec = rinfo->right_ec;
+
+ pathkey = make_canonical_pathkey(root,
+ ec,
+ linitial_oid(ec->ec_opfamilies),
+ BTLessStrategyNumber,
+ false);
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+ return pathkeys;
+}
+
/*
* make_inner_pathkeys_for_merge
* Builds a pathkey list representing the explicit sort order that
@@ -1621,6 +1765,35 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
return pathkeys;
}
+/*
+ * make_inner_pathkey_for_range
+ */
+PathKey *
+make_inner_pathkey_for_range(PlannerInfo *root,
+ List *rangeclause)
+{
+ EquivalenceClass *ec;
+ PathKey *pathkey;
+ RestrictInfo *rinfo;
+
+ if(rangeclause == NIL)
+ return NULL;
+
+ rinfo = linitial(rangeclause);
+
+ if (rinfo->outer_is_left)
+ ec = rinfo->right_ec;
+ else
+ ec = rinfo->left_ec;
+
+ pathkey = make_canonical_pathkey(root,
+ ec,
+ linitial_oid(ec->ec_opfamilies),
+ BTLessStrategyNumber,
+ false);
+ return pathkey;
+}
+
/*
* trim_mergeclauses_for_inner_pathkeys
* This routine trims a list of mergeclauses to include just those that
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 3dc0176a51..49cc8d16d6 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -246,6 +246,7 @@ static Hash *make_hash(Plan *lefttree,
static MergeJoin *make_mergejoin(List *tlist,
List *joinclauses, List *otherclauses,
List *mergeclauses,
+ List *rangeclause,
Oid *mergefamilies,
Oid *mergecollations,
int *mergestrategies,
@@ -4301,6 +4302,7 @@ create_mergejoin_plan(PlannerInfo *root,
List *joinclauses;
List *otherclauses;
List *mergeclauses;
+ List *rangeclause;
List *outerpathkeys;
List *innerpathkeys;
int nClauses;
@@ -4355,6 +4357,9 @@ create_mergejoin_plan(PlannerInfo *root,
mergeclauses = get_actual_clauses(best_path->path_mergeclauses);
joinclauses = list_difference(joinclauses, mergeclauses);
+ rangeclause = get_actual_clauses(best_path->path_rangeclause);
+ joinclauses = list_difference(joinclauses, rangeclause);
+
/*
* Replace any outer-relation variables with nestloop params. There
* should not be any in the mergeclauses.
@@ -4365,6 +4370,8 @@ create_mergejoin_plan(PlannerInfo *root,
replace_nestloop_params(root, (Node *) joinclauses);
otherclauses = (List *)
replace_nestloop_params(root, (Node *) otherclauses);
+ rangeclause = (List *)
+ replace_nestloop_params(root, (Node *) rangeclause);
}
/*
@@ -4581,6 +4588,7 @@ create_mergejoin_plan(PlannerInfo *root,
joinclauses,
otherclauses,
mergeclauses,
+ rangeclause,
mergefamilies,
mergecollations,
mergestrategies,
@@ -5883,6 +5891,7 @@ make_mergejoin(List *tlist,
List *joinclauses,
List *otherclauses,
List *mergeclauses,
+ List *rangeclause,
Oid *mergefamilies,
Oid *mergecollations,
int *mergestrategies,
@@ -5909,6 +5918,7 @@ make_mergejoin(List *tlist,
node->join.jointype = jointype;
node->join.inner_unique = inner_unique;
node->join.joinqual = joinclauses;
+ node->rangeclause = rangeclause;
return node;
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index f6a202d900..10990394ec 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -16,6 +16,7 @@
#include "catalog/pg_class.h"
#include "catalog/pg_type.h"
+#include "catalog/pg_operator.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
@@ -77,6 +78,7 @@ static bool check_equivalence_delay(PlannerInfo *root,
RestrictInfo *restrictinfo);
static bool check_redundant_nullability_qual(PlannerInfo *root, Node *clause);
static void check_mergejoinable(RestrictInfo *restrictinfo);
+static void check_rangejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
static void check_memoizable(RestrictInfo *restrictinfo);
@@ -1877,6 +1879,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
*/
check_mergejoinable(restrictinfo);
+ check_rangejoinable(restrictinfo);
+
/*
* If it is a true equivalence clause, send it to the EquivalenceClass
* machinery. We do *not* attach it directly to any restriction or join
@@ -1962,6 +1966,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
}
}
+ if(restrictinfo->rangeleftopfamilies)
+ {
+ Assert(restrictinfo->rangerightopfamilies);
+
+ initialize_rangeclause_eclasses(root, restrictinfo);
+ }
+
/* No EC special case applies, so push it into the clause lists */
distribute_restrictinfo_to_rels(root, restrictinfo);
}
@@ -2677,6 +2688,50 @@ check_mergejoinable(RestrictInfo *restrictinfo)
*/
}
+/*
+ * check_rangejoinable
+ */
+static void
+check_rangejoinable(RestrictInfo *restrictinfo)
+{
+ OpExpr *clause = (OpExpr *) restrictinfo->clause;
+ Oid opno = clause->opno;
+
+ if (!restrictinfo->can_join)
+ return;
+ if (contain_volatile_functions((Node *) clause))
+ return;
+
+ if (opno == OID_RANGE_CONTAINS_ELEM_OP ||
+ opno == OID_RANGE_ELEM_CONTAINED_OP)
+ {
+ Node *leftarg = get_leftop(clause),
+ *rightarg = get_rightop(clause);
+
+ Oid lefttype = exprType(leftarg),
+ righttype = exprType(rightarg);
+
+ TypeCacheEntry *left_typecache = lookup_type_cache(lefttype, TYPECACHE_CMP_PROC),
+ *right_typecache = lookup_type_cache(righttype, TYPECACHE_CMP_PROC);
+
+ restrictinfo->rangeleftopfamilies =
+ lappend_oid(restrictinfo->rangeleftopfamilies,
+ left_typecache->btree_opf);
+
+ restrictinfo->rangerightopfamilies =
+ lappend_oid(restrictinfo->rangerightopfamilies,
+ right_typecache->btree_opf);
+ }
+ else
+ {
+ List *opfamilies = get_rangejoin_opfamilies(opno);
+
+ restrictinfo->rangeleftopfamilies = opfamilies;
+ restrictinfo->rangerightopfamilies = opfamilies;
+
+ }
+}
+
/*
* check_hashjoinable
* If the restrictinfo's clause is hashjoinable, set the hashjoin
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 6ccec759bd..c309a3e114 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2038,6 +2038,14 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
(Index) 0,
rtoffset,
NUM_EXEC_QUAL((Plan *) join));
+
+ mj->rangeclause = fix_join_expr(root,
+ mj->rangeclause,
+ outer_itlist,
+ inner_itlist,
+ (Index) 0,
+ rtoffset,
+ NUM_EXEC_QUAL((Plan *) join));
}
else if (IsA(join, HashJoin))
{
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e53d381e19..f8490fa4ef 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2502,6 +2502,7 @@ create_mergejoin_path(PlannerInfo *root,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
+ List *rangeclause,
List *outersortkeys,
List *innersortkeys)
{
@@ -2530,6 +2531,7 @@ create_mergejoin_path(PlannerInfo *root,
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_mergeclauses = mergeclauses;
+ pathnode->path_rangeclause = rangeclause;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
/* pathnode->skip_mark_restore will be set by final_cost_mergejoin */
@@ -4134,6 +4136,7 @@ do { \
REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
+ ADJUST_CHILD_ATTRS(mpath->path_rangeclause);
new_path = (Path *) mpath;
}
break;
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index ebfcf91826..3051ad540b 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -201,6 +201,8 @@ make_restrictinfo_internal(PlannerInfo *root,
restrictinfo->outer_selec = -1;
restrictinfo->mergeopfamilies = NIL;
+ restrictinfo->rangeleftopfamilies = NIL;
+ restrictinfo->rangerightopfamilies = NIL;
restrictinfo->left_ec = NULL;
restrictinfo->right_ec = NULL;
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
index 815175a654..3972480519 100644
--- a/src/backend/utils/adt/rangetypes.c
+++ b/src/backend/utils/adt/rangetypes.c
@@ -2507,6 +2507,66 @@ range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum
return true;
}
+/*
+ * Test whether range r is right of a specific element value.
+ */
+bool
+elem_before_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r)
+{
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+ int32 cmp;
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+
+ if (empty)
+ return true;
+
+ if (!lower.infinite)
+ {
+ cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ lower.val, val));
+ if (cmp > 0)
+ return true;
+ if (cmp == 0 && !lower.inclusive)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Test whether range r is left of a specific element value.
+ */
+bool
+elem_after_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r)
+{
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+ int32 cmp;
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+
+ if (empty)
+ return true;
+
+ if (!upper.infinite)
+ {
+ cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ upper.val, val));
+ if (cmp < 0)
+ return true;
+ if (cmp == 0 && !upper.inclusive)
+ return true;
+ }
+
+ return false;
+}
+
/*
* datum_compute_size() and datum_write() are used to insert the bound
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 4ebaa552a2..f6cc0cdb8a 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -389,6 +389,40 @@ get_mergejoin_opfamilies(Oid opno)
return result;
}
+/*
+ * get_rangejoin_opfamilies
+ */
+List *
+get_rangejoin_opfamilies(Oid opno)
+{
+ List *result = NIL;
+ CatCList *catlist;
+ int i;
+
+ /*
+ * Search pg_amop to see if the target operator is registered as the "<",
+ * "<=", ">" or ">=" operator of any btree opfamily.
+ */
+ catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno));
+
+ for (i = 0; i < catlist->n_members; i++)
+ {
+ HeapTuple tuple = &catlist->members[i]->tuple;
+ Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+ if (aform->amopmethod == BTREE_AM_OID &&
+ (aform->amopstrategy == BTLessStrategyNumber ||
+ aform->amopstrategy == BTLessEqualStrategyNumber ||
+ aform->amopstrategy == BTGreaterStrategyNumber ||
+ aform->amopstrategy == BTGreaterEqualStrategyNumber))
+ result = lappend_oid(result, aform->amopfamily);
+ }
+
+ ReleaseSysCacheList(catlist);
+
+ return result;
+}
+
/*
* get_compatible_hash_operators
* Get the OID(s) of hash equality operator(s) compatible with the given
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2e8cbee69f..a682694a21 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1949,6 +1949,7 @@ typedef struct NestLoopState
*/
/* private in nodeMergejoin.c: */
typedef struct MergeJoinClauseData *MergeJoinClause;
+typedef struct RangeJoinData *RangeData;
typedef struct MergeJoinState
{
@@ -1970,6 +1971,8 @@ typedef struct MergeJoinState
TupleTableSlot *mj_NullInnerTupleSlot;
ExprContext *mj_OuterEContext;
ExprContext *mj_InnerEContext;
+ RangeData mj_RangeData;
+ bool mj_RangeJoin;
} MergeJoinState;
/* ----------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 186e89905b..c7d4431f06 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1647,6 +1647,7 @@ typedef struct MergePath
{
JoinPath jpath;
List *path_mergeclauses; /* join clauses to be used for merge */
+ List *path_rangeclause; /* join clause to be used for range merge */
List *outersortkeys; /* keys for explicit sort, if any */
List *innersortkeys; /* keys for explicit sort, if any */
bool skip_mark_restore; /* can executor skip mark/restore? */
@@ -2102,6 +2103,8 @@ typedef struct RestrictInfo
/* valid if clause is mergejoinable, else NIL */
List *mergeopfamilies; /* opfamilies containing clause operator */
+ List *rangeleftopfamilies;
+ List *rangerightopfamilies;
/* cache space for mergeclause processing; NULL if not yet set */
EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */
@@ -2529,6 +2532,7 @@ typedef struct JoinPathExtraData
{
List *restrictlist;
List *mergeclause_list;
+ List *rangeclause_list;
bool inner_unique;
SpecialJoinInfo *sjinfo;
SemiAntiJoinFactors semifactors;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 01a246d50e..5fdc45faf9 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -754,6 +754,7 @@ typedef struct MergeJoin
Oid *mergeCollations; /* per-clause OIDs of collations */
int *mergeStrategies; /* per-clause ordering (ASC or DESC) */
bool *mergeNullsFirst; /* per-clause nulls ordering */
+ List *rangeclause; /* rangeclause as expression tree */
} MergeJoin;
/* ----------------
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index f704d39980..365d8b809e 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -167,6 +167,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
+ List *rangeclause,
List *outersortkeys,
List *innersortkeys);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..0acfd297eb 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -231,17 +231,27 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
List *tlist);
extern void initialize_mergeclause_eclasses(PlannerInfo *root,
RestrictInfo *restrictinfo);
+extern void initialize_rangeclause_eclasses(PlannerInfo *root,
+ RestrictInfo *restrictinfo);
extern void update_mergeclause_eclasses(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern List *find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
List *pathkeys,
List *restrictinfos);
+extern List *find_rangeclauses_for_outer_pathkeys(PlannerInfo *root,
+ List *pathkeys,
+ List *mergeclauses,
+ List *rangeclauses);
extern List *select_outer_pathkeys_for_merge(PlannerInfo *root,
List *mergeclauses,
RelOptInfo *joinrel);
+extern List *select_outer_pathkeys_for_range(PlannerInfo *root,
+ List *rangeclauses);
extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
List *mergeclauses,
List *outer_pathkeys);
+extern PathKey *make_inner_pathkey_for_range(PlannerInfo *root,
+ List *rangeclause);
extern List *trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root,
List *mergeclauses,
List *pathkeys);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 77871aaefc..a47beb8be5 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -79,6 +79,7 @@ extern bool get_ordering_op_properties(Oid opno,
extern Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse);
extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type);
extern List *get_mergejoin_opfamilies(Oid opno);
+extern List *get_rangejoin_opfamilies(Oid opno);
extern bool get_compatible_hash_operators(Oid opno,
Oid *lhs_opno, Oid *rhs_opno);
extern bool get_op_hash_functions(Oid opno,
diff --git a/src/include/utils/rangetypes.h b/src/include/utils/rangetypes.h
index 04c302c619..51bdc5308b 100644
--- a/src/include/utils/rangetypes.h
+++ b/src/include/utils/rangetypes.h
@@ -95,6 +95,8 @@ typedef struct
*/
extern bool range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum val);
+extern bool elem_before_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r);
+extern bool elem_after_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r);
/* internal versions of the above */
extern bool range_eq_internal(TypeCacheEntry *typcache, const RangeType *r1,
--
2.31.1