This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 563d5c50b82ed0765547aa7b57e0a04822b9e484 Author: skyyws <[email protected]> AuthorDate: Sat Nov 13 00:36:33 2021 +0800 IMPALA-7942 (part 2): Add query hints for predicate selectivities Currently, Impala only uses simple estimation to compute selectivity. For some predicates, this may lead to worse query plan due to CBO. This patch adds a new query hint: 'SELECTIVITY' to help specify a selectivity value for a predicate. The parser will interpret expressions wrapped in () followed by a C-style comment /* <predicate hint> */ as a predicate hint. The predicate hint currently can be in the form of +SELECTIVITY(f) where 'f' is a positive floating point number, in the range of (0, 1], to use as the selectivity for the preceding expression. Single predicate example: select col from t where (a=1) /* +SELECTIVITY(0.5) */; Compound predicate example: select col from t where (a=1 or b=2) /* +SELECTIVITY(0.5) */; As a limitation of this path, the selectivity hints for 'AND' compound predicates, either in the original SQL query or internally generated, are ignored. We may supported this in the near future. Testing: - Added new fe tests in 'PlannerTest' - Added new fe tests in 'AnalyzeStmtsTest' for negative cases Change-Id: I2776b9bbd878b8a21d9c866b400140a454f59e1b Reviewed-on: http://gerrit.cloudera.org:8080/18023 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Qifan Chen <[email protected]> --- fe/src/main/cup/sql-parser.cup | 28 ++- .../apache/impala/analysis/BinaryPredicate.java | 4 + .../apache/impala/analysis/CompoundPredicate.java | 18 +- .../org/apache/impala/analysis/InPredicate.java | 3 + .../apache/impala/analysis/IsNullPredicate.java | 3 + .../java/org/apache/impala/analysis/Predicate.java | 38 +++ fe/src/main/jflex/sql-scanner.flex | 1 + .../apache/impala/analysis/AnalyzeStmtsTest.java | 55 +++++ .../org/apache/impala/planner/PlannerTest.java | 9 + .../PlannerTest/predicate-selectivity-hint.test | 262 +++++++++++++++++++++ 10 files changed, 415 insertions(+), 6 deletions(-) diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup index b136a4631..9a2de413c 100755 --- a/fe/src/main/cup/sql-parser.cup +++ b/fe/src/main/cup/sql-parser.cup @@ -325,7 +325,7 @@ terminal KW_RANGE, KW_RCFILE, KW_RECOVER, KW_REFERENCES, KW_REFRESH, KW_REGEXP, KW_RELY, KW_RENAME, KW_REPEATABLE, KW_REPLACE, KW_REPLICATION, KW_RESTRICT, KW_RETURNS, KW_REVOKE, KW_RIGHT, KW_RLIKE, KW_ROLE, KW_ROLES, KW_ROLLUP, KW_ROW, KW_ROWS, KW_RWSTORAGE, KW_SCHEMA, - KW_SCHEMAS, KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES, KW_SERIALIZE_FN, + KW_SCHEMAS, KW_SELECT, KW_SELECTIVITY, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES, KW_SERIALIZE_FN, KW_SET, KW_SHOW, KW_SMALLINT, KW_SETS, KW_SORT, KW_SPEC, KW_STORAGE_HANDLER_URI, KW_STORED, KW_STRAIGHT_JOIN, KW_STRING, KW_STRUCT, KW_SYMBOL, KW_SYSTEM_TIME, KW_SYSTEM_VERSION, KW_TABLE, KW_TABLES, KW_TABLESAMPLE, KW_TBLPROPERTIES, @@ -454,7 +454,7 @@ nonterminal TimeTravelSpec opt_asof; nonterminal Subquery subquery; nonterminal JoinOperator join_operator; nonterminal opt_inner, opt_outer; -nonterminal PlanHint plan_hint; +nonterminal PlanHint plan_hint, selectivity_hint; nonterminal List<PlanHint> plan_hints, opt_plan_hints, plan_hint_list; nonterminal TypeDef type_def; nonterminal Type type; @@ -654,6 +654,9 @@ precedence left KW_INTO; precedence left KW_OVER; precedence left KW_HASH; +// Avoid shift/reduce conflicts for supporting selectivity hint after predicates. +precedence left COMMENTED_PLAN_HINT_START; + start with stmt; stmt ::= @@ -3400,6 +3403,19 @@ expr_list ::= :} ; +// Currently, 'selectivity' hint can be set for some predicates. +// It's used to replace the selectivity estimated by the planner. +selectivity_hint ::= + COMMENTED_PLAN_HINT_START KW_SELECTIVITY LPAREN DECIMAL_LITERAL:value RPAREN + COMMENTED_PLAN_HINT_END + {: + RESULT = new PlanHint("selectivity", + new ArrayList(Arrays.asList(value.toString()))); + :} + | /* empty */ + {: RESULT = null; :} + ; + // Currently, we allow predicate hints for the top level WHERE // clause. An attempt was made to set this for individual exprs // (as part of the 'expr' grammar) but that generated quite a @@ -3888,8 +3904,12 @@ predicate ::= {: RESULT = p; :} | bool_test_expr:e {: RESULT = e; :} - | LPAREN predicate:p RPAREN + | LPAREN predicate:p RPAREN selectivity_hint:hint {: + if (p instanceof Predicate && hint != null) { + Predicate predicate = (Predicate) p; + predicate.setSelectivityHint(Double.valueOf(hint.getArgs().get(0))); + } p.setPrintSqlInParens(true); RESULT = p; :} @@ -4457,6 +4477,8 @@ word ::= {: RESULT = r.toString(); :} | KW_SELECT:r {: RESULT = r.toString(); :} + | KW_SELECTIVITY:r + {: RESULT = r.toString(); :} | KW_SEMI:r {: RESULT = r.toString(); :} | KW_SEQUENCEFILE:r diff --git a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java index 22c8730e5..8e5b147ff 100644 --- a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java @@ -245,6 +245,10 @@ public class BinaryPredicate extends Predicate { protected void computeSelectivity() { // TODO: Compute selectivity for nested predicates. // TODO: Improve estimation using histograms. + if (hasValidSelectivityHint()) { + return; + } + Reference<SlotRef> slotRefRef = new Reference<SlotRef>(); if (!isSingleColumnPredicate(slotRefRef, null)) { return; diff --git a/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java b/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java index 665bf11f8..0dee5f71e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/CompoundPredicate.java @@ -143,10 +143,22 @@ public class CompoundPredicate extends Predicate { Preconditions.checkState(fn_.getReturnType().isBoolean()); castForFunctionCall(false, analyzer.isDecimalV2()); - computeSelectivity(); - } + computeSelectivity(analyzer); + } + + protected void computeSelectivity(Analyzer analyzer) { + if (hasValidSelectivityHint()) { + // TODO: Support selectivity hint for 'AND' compound predicates. + if (this.getOp() == Operator.AND) { + // 'AND' compound predicates will be replaced by children in Expr#getConjuncts, + // so selectivity hint will be missing, we add a warning here. + analyzer.addWarning("Selectivity hints are ignored for 'AND' compound " + + "predicates, either in the SQL query or internally generated."); + } else { + return; + } + } - protected void computeSelectivity() { if (!getChild(0).hasSelectivity() || (children_.size() == 2 && !getChild(1).hasSelectivity())) { // Give up if one of our children has an unknown selectivity. diff --git a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java index bd0d8a7d0..e576b754d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java @@ -173,6 +173,9 @@ public class InPredicate extends Predicate { } protected void computeSelectivity() { + if (hasValidSelectivityHint()) { + return; + } // TODO: Fix selectivity_ for nested predicate Reference<SlotRef> slotRefRef = new Reference<SlotRef>(); Reference<Integer> idxRef = new Reference<Integer>(); diff --git a/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java b/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java index a9e62c59d..7c828a6e6 100644 --- a/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java @@ -140,6 +140,9 @@ public class IsNullPredicate extends Predicate { } protected void computeSelectivity() { + if (hasValidSelectivityHint()) { + return; + } // TODO: increase this to make sure we don't end up favoring broadcast joins // due to underestimated cardinalities? Reference<SlotRef> slotRefRef = new Reference<SlotRef>(); diff --git a/fe/src/main/java/org/apache/impala/analysis/Predicate.java b/fe/src/main/java/org/apache/impala/analysis/Predicate.java index 64ee408e9..df3c3fc0f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Predicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/Predicate.java @@ -30,11 +30,18 @@ public abstract class Predicate extends Expr { // cache prior shouldConvertToCNF checks to avoid repeat tree walking // omitted from clone in case cloner plans to mutate the expr protected Optional<Boolean> shouldConvertToCNF_ = Optional.empty(); + // Reserve 'SELECTIVITY' hint value from query to replace original selectivity + // computing in sql analysis phase. + // Default value is -1.0, means no selectivity hint set. + // The allowed values is (0,1], 1 means all records are eligible, 0 is not allowed, + // 0 makes no sense for a query. + protected double selectivityHint_; public Predicate() { super(); isEqJoinConjunct_ = false; hasAlwaysTrueHint_ = false; + selectivityHint_ = -1.0; } /** @@ -44,6 +51,7 @@ public abstract class Predicate extends Expr { super(other); isEqJoinConjunct_ = other.isEqJoinConjunct_; hasAlwaysTrueHint_ = other.hasAlwaysTrueHint_; + selectivityHint_ = other.selectivityHint_; } public void setIsEqJoinConjunct(boolean v) { isEqJoinConjunct_ = v; } @@ -55,6 +63,26 @@ public abstract class Predicate extends Expr { // values: true/false/null numDistinctValues_ = 3; analyzeHints(analyzer); + + analyzeSelectivityHint(analyzer); + } + + /** + * Set selectivity_ if this predicate has a selectivity hint, and value is legal. + * Otherwise, Impala will print a warning msg, and ignore this hint value. + */ + protected void analyzeSelectivityHint(Analyzer analyzer) { + if (selectivityHint_ >= 0) { + // If we set a negative number in selectivity hint, the query will throw + // 'Syntax error' exception directly, so the 'selectivityHint_' is always larger + // than or equal to zero here. + if (selectivityHint_ == 0 || selectivityHint_ > 1.0) { + analyzer.addWarning("Invalid selectivity hint value: " + selectivityHint_ + + ", allowed value should be a double value in (0, 1]."); + } else { + selectivity_ = selectivityHint_; + } + } } /** @@ -135,4 +163,14 @@ public abstract class Predicate extends Expr { public boolean hasAlwaysTrueHint() { return hasAlwaysTrueHint_; } + public void setSelectivityHint(double selectivityHint) { + this.selectivityHint_ = selectivityHint; + } + + /** + * Valid selectivity hint is (0,1], return true if hint value is in the range. + */ + public boolean hasValidSelectivityHint() { + return selectivityHint_ > 0 && selectivityHint_ <= 1.0; + } } diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index 2102e0d1b..ef01747c0 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -240,6 +240,7 @@ import org.apache.impala.thrift.TReservedWordsVersion; keywordMap.put("schema", SqlParserSymbols.KW_SCHEMA); keywordMap.put("schemas", SqlParserSymbols.KW_SCHEMAS); keywordMap.put("select", SqlParserSymbols.KW_SELECT); + keywordMap.put("selectivity", SqlParserSymbols.KW_SELECTIVITY); keywordMap.put("semi", SqlParserSymbols.KW_SEMI); keywordMap.put("sequencefile", SqlParserSymbols.KW_SEQUENCEFILE); keywordMap.put("serdeproperties", SqlParserSymbols.KW_SERDEPROPERTIES); diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index 8aefe1ff9..4e5aaba8f 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -5124,4 +5124,59 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "Table hint not recognized for table " + "functional_hbase.alltypes: TABLE_NUM_ROWS(100)"); } + + @Test + public void testSelectivityHintNegative() { + // Selectivity hint must use bracket, even for single predicate + AnalysisError("select * from t1 where a > 1 and b > 2 /* +SELECTIVITY(0.1) */", + "Syntax error in line 1"); + AnalysisError("select * from t1 where a > 1 /* +SELECTIVITY(0.1) */", + "Syntax error in line 1"); + + // Cannot set selectivity hint exists predicate + AnalysisError("select * from t1 where exists (select x from t2) " + + "/* +SELECTIVITY(0.1) */", + "Syntax error in line 1"); + + // Selectivity hint only accept one parameter with decimal type + // Negative number and zero are not allowed + AnalysisError("select * from t1 where (a > 1) /* +SELECTIVITY('0.1') */", + "Syntax error in line 1"); + AnalysisError("select * from t1 where (a > 1) /* +SELECTIVITY(0) */", + "Syntax error in line 1"); + AnalysisError("select * from t1 where (a > 1) /* +SELECTIVITY(-1.0) */", + "Syntax error in line 1"); + AnalysisError("select * from t1 where (a > 1) /* +SELECTIVITY(0.1, 0.2) */", + "Syntax error in line 1"); + AnalysisError("select * from t1 where (a > 1) /* +SELECTIVITY(1/3) */", + "Syntax error in line 1"); + } + + @Test + public void testSelectivityHintPositive() { + // Selectivity hint legal value is (0,1] + AnalyzesOk("select * from tpch.lineitem where (l_shipdate <= '1998-09-02') " + + "/* +SELECTIVITY(1.1) */", + "Invalid selectivity hint value: 1.1, allowed value should be a double value in " + + "(0, 1]."); + AnalyzesOk("select * from tpch.lineitem where (l_shipdate <= '1998-09-02') " + + "/* +SELECTIVITY(0.0) */", + "Invalid selectivity hint value: 0.0, allowed value should be a double value in " + + "(0, 1]."); + + // Also valid for a very long decimal value + AnalyzesOk("select * from functional.alltypes where (id > 1000)" + + "/* +SELECTIVITY(0.3333333333333333333333333333333333) */"); + // Set selectivity hint for compound predicate + AnalyzesOk("select * from functional.alltypes where (id > 1000 and int_col = 1)" + + "/* +SELECTIVITY(0.1) */"); + AnalyzesOk("select * from functional.alltypes where (id > 1000 or int_col = 1)" + + "/* +SELECTIVITY(0.1) */"); + + // Selectivity hint is invalid for 'AND' compound predicate. + AnalyzesOk("select * from tpch.lineitem where (l_shipdate <= '1998-09-02' and " + + "l_shipdate >= '1997-09-02')/* +SELECTIVITY(0.5) */", + "Selectivity hints are ignored for 'AND' compound predicates, either in the SQL " + + "query or internally generated."); + } } diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 920e684ba..54eb40393 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -1366,4 +1366,13 @@ public class PlannerTest extends PlannerTestBase { PlannerTestOption.VALIDATE_RESOURCES, PlannerTestOption.VALIDATE_CARDINALITY)); } + + /** + * Test SELECTIVITY hints + */ + @Test + public void testPredicateSelectivityHints() { + runPlannerTestFile("predicate-selectivity-hint", + ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); + } } diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-selectivity-hint.test b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-selectivity-hint.test new file mode 100644 index 000000000..f31f8e10e --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-selectivity-hint.test @@ -0,0 +1,262 @@ +# Table 'tpch.lineitem' has 6001215 rows, so the scan on it has cardinality as 6.00M. +# If the selectivity of the predicate is 0.1, cardinality in '00:SCAN HDFS' is 6001215 * 0.1 = 600.12K +# Simple 'BinaryPredicate' example without selectivity hint +# Planner assigns the default selectivity (0.1) to this predicate +select * from tpch.lineitem where l_shipdate <= '1998-09-02' +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate <= '1998-09-02' + row-size=231B cardinality=600.12K +==== +# Simple 'BinaryPredicate' example with selectivity hint +# Since almost 98% of the values are less than '1998-09-02', we set selectivity manually for this predicate +select * from tpch.lineitem where (l_shipdate <= '1998-09-02')/* +SELECTIVITY(0.98) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate <= '1998-09-02') + row-size=231B cardinality=5.88M +==== +# Simple 'InPredicate' case without selectivity hint +# This predicate selectivity is 3/distinctValue, almost 0.12% +select * from tpch.lineitem where l_shipdate IN ('1998-09-01', '1998-09-02', '1998-09-03') +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate IN ('1998-09-01', '1998-09-02', '1998-09-03') + row-size=231B cardinality=6.85K +==== +# Simple 'InPredicate' case with selectivity hint +# This predicate selectivity is 3/distinctValue, almost 0.12% +# We assume that this predicate actual selectivity is 0.5 for testing, and set hint manually +select * from tpch.lineitem +where (l_shipdate IN ('1998-09-01', '1998-09-02', '1998-09-03'))/* +SELECTIVITY(0.5) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate IN ('1998-09-01', '1998-09-02', '1998-09-03')) + row-size=231B cardinality=3.00M +==== +# Simple 'IsNullPredicate' case without selectivity hint +# This predicate selectivity is: getStats().getNumNulls() / numRows +# There are no null values in 'l_shipdate' column, so this selectivity is 0 +select * from tpch.lineitem where l_shipdate IS NULL +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate IS NULL + row-size=231B cardinality=1 +==== +# Simple 'IsNullPredicate' case with selectivity hint +# Assuming the predicate has 0.5 as the selectivity by using the hint +select * from tpch.lineitem where (l_shipdate IS NULL)/* +SELECTIVITY(0.5) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate IS NULL) + row-size=231B cardinality=3.00M +==== +# Simple 'LikePredicate' example without selectivity hint +# Planner will assign the default selectivity (0.1) on this predicate +select * from tpch.lineitem where l_shipdate LIKE '1998-%' +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate LIKE '1998-%' + row-size=231B cardinality=600.12K +==== +# Simple 'LikePredicate' example with selectivity hint +# The actual selectivity of this predicate is around 11.5%. Set it by the hint manually. +select * from tpch.lineitem where (l_shipdate LIKE '1998-%')/* +SELECTIVITY(0.115) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate LIKE '1998-%') + row-size=231B cardinality=690.14K +==== +# Simple not 'LikePredicate' example without selectivity hint +# Planner will assign the default selectivity (0.1) on this predicate +select * from tpch.lineitem where l_shipdate NOT LIKE '1998-%' +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: NOT l_shipdate LIKE '1998-%' + row-size=231B cardinality=600.12K +==== +# Simple not 'LikePredicate' example with selectivity hint +# The actual selectivity of this LIKE predicate is around 11.5% (same as the above one). +# So the selectivity of the corresponding NOT LIKE predicate is 88.5% +select * from tpch.lineitem where (l_shipdate NOT LIKE '1998-%')/* +SELECTIVITY(0.885) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (NOT l_shipdate LIKE '1998-%') + row-size=231B cardinality=5.31M +==== +# Simple 'BetweenPredicate' example without selectivity hint +# Planner will assign the default selectivity (0.1) on this predicate +select * from tpch.lineitem where l_shipdate BETWEEN '1998-09-01' AND '1998-09-03' +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate <= '1998-09-03', l_shipdate >= '1998-09-01' + row-size=231B cardinality=600.12K +==== +# Selectivity hint is invalid for BetweenPredicate currently +select * from tpch.lineitem +where (l_shipdate BETWEEN '1998-09-01' AND '1998-09-03')/* +SELECTIVITY(0.5)*/ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate <= '1998-09-03', l_shipdate >= '1998-09-01' + row-size=231B cardinality=600.12K +==== +# Test for compound predicates +# Planner will assign the default selectivity (0.1) on this predicate +select * from tpch.lineitem where l_shipdate <= '1998-09-02' and l_shipdate >= '1997-09-02' +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate <= '1998-09-02', l_shipdate >= '1997-09-02' + row-size=231B cardinality=600.12K +==== +# Selectivity hint for 'AND' compound predicate is invalid +select * from tpch.lineitem +where (l_shipdate <= '1998-09-02' and l_shipdate >= '1997-09-02')/* +SELECTIVITY(0.5) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: l_shipdate <= '1998-09-02', l_shipdate >= '1997-09-02' + row-size=231B cardinality=600.12K +==== +# Test for compound predicate, with 'OR' +# Planner will assign the default selectivity (0.1) on this predicate +select * from tpch.lineitem +where (l_shipdate <= '1998-09-02' or l_shipdate >= '1997-09-02') +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate <= '1998-09-02' OR l_shipdate >= '1997-09-02') + row-size=231B cardinality=600.12K +==== +# Set selectivity hint for compound predicate, with 'OR' +select * from tpch.lineitem +where (l_shipdate <= '1998-09-02' or l_shipdate >= '1997-09-02')/* +SELECTIVITY(0.5) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate <= '1998-09-02' OR l_shipdate >= '1997-09-02') + row-size=231B cardinality=3.00M +==== +# Set selectivity hint for each single predicate also valid for compound predicate +select * from tpch.lineitem +where (l_shipdate <= '1998-09-02')/* +SELECTIVITY(0.5) */ and (l_shipdate >= '1997-09-02')/* +SELECTIVITY(0.5) */ +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [tpch.lineitem] + HDFS partitions=1/1 files=1 size=718.94MB + predicates: (l_shipdate <= '1998-09-02'), (l_shipdate >= '1997-09-02') + row-size=231B cardinality=2.12M +==== +# A simple example to show that selectivity hint can help change join mode and be used as an optimization tool. +# This query is from tpch-3.sql +# The original join is: lineitem JOIN(PARTITIONED) orders JOIN(BROADCAST) customer +# For predicate o_orderdate < date '1995-03-15', the planner assigns the default selectivity (0.1) to it +# If we assume the actual selectivity is 0.01, and add selectivity +# hint manually, the new join becomes: +# lineitem JOIN(BROADCAST) orders JOIN(BROADCAST) customer +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + tpch.customer, + tpch.orders, + tpch.lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and (o_orderdate < date '1995-03-15') /* +SELECTIVITY(0.01) */ + and l_shipdate > date '1995-03-15' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate; +---- PLAN +PLAN-ROOT SINK +| +06:SORT +| order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC +| row-size=50B cardinality=1.76K +| +05:AGGREGATE [FINALIZE] +| output: sum(l_extendedprice * (1 - l_discount)) +| group by: l_orderkey, o_orderdate, o_shippriority +| row-size=50B cardinality=1.76K +| +04:HASH JOIN [INNER JOIN] +| hash predicates: c_custkey = o_custkey +| runtime filters: RF000 <- o_custkey +| row-size=117B cardinality=1.76K +| +|--03:HASH JOIN [INNER JOIN] +| | hash predicates: l_orderkey = o_orderkey +| | runtime filters: RF002 <- o_orderkey +| | row-size=88B cardinality=5.76K +| | +| |--01:SCAN HDFS [tpch.orders] +| | HDFS partitions=1/1 files=1 size=162.56MB +| | predicates: (o_orderdate < DATE '1995-03-15') +| | row-size=42B cardinality=15.00K +| | +| 02:SCAN HDFS [tpch.lineitem] +| HDFS partitions=1/1 files=1 size=718.94MB +| predicates: l_shipdate > DATE '1995-03-15' +| runtime filters: RF002 -> l_orderkey +| row-size=46B cardinality=600.12K +| +00:SCAN HDFS [tpch.customer] + HDFS partitions=1/1 files=1 size=23.08MB + predicates: c_mktsegment = 'BUILDING' + runtime filters: RF000 -> c_custkey + row-size=29B cardinality=30.00K +====
