This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 4764b91f4 IMPALA-12965: Add debug query option
RUNTIME_FILTER_IDS_TO_SKIP
4764b91f4 is described below
commit 4764b91f42820e0cd114268592e090285ea79b3c
Author: Riza Suminto <[email protected]>
AuthorDate: Tue Apr 2 11:06:54 2024 -0700
IMPALA-12965: Add debug query option RUNTIME_FILTER_IDS_TO_SKIP
Runtime filter still have negative effect on certain scenario such as
long wait time that delays scan and cascading runtime filter chain that
prevents parallel execution of fragments. Having debug query option to
simply skip a runtime filter id from being scheduled can help us
investigate and test a solution early before implementing the
improvement code.
This patch add RUNTIME_FILTER_IDS_TO_SKIP option to do that. This patch
also improve parsing of multi-value query options to not split at ','
char that is within two double quotes and ignore empty/whitespace value
if exist.
Testing:
- Add BE test in query-options-test.cc
- Add FE test in runtime-filter-query-options.test
Change-Id: I897e37685dd1ec279989b55560ec7616a00d2280
Reviewed-on: http://gerrit.cloudera.org:8080/21230
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/service/child-query.cc | 4 +
be/src/service/query-options-test.cc | 116 +++++++++++++-
be/src/service/query-options.cc | 64 +++++++-
be/src/service/query-options.h | 9 +-
common/thrift/ImpalaService.thrift | 6 +
common/thrift/Query.thrift | 3 +
.../impala/planner/RuntimeFilterGenerator.java | 15 +-
.../org/apache/impala/planner/PlannerTest.java | 1 -
.../PlannerTest/runtime-filter-query-options.test | 172 +++++++++++++++++++++
9 files changed, 375 insertions(+), 15 deletions(-)
diff --git a/be/src/service/child-query.cc b/be/src/service/child-query.cc
index cb557cbc0..a1eb77cd3 100644
--- a/be/src/service/child-query.cc
+++ b/be/src/service/child-query.cc
@@ -145,6 +145,10 @@ void PrintQueryOptionValue(const
set<impala::TRuntimeFilterType::type>& filter_t
val << filter_types;
}
+void PrintQueryOptionValue(const std::set<int32_t>& filter_ids, stringstream&
val) {
+ val << filter_ids;
+}
+
void ChildQuery::SetQueryOptions(TExecuteStatementReq* exec_stmt_req) {
map<string, string> conf;
const TQueryOptions& parent_options =
diff --git a/be/src/service/query-options-test.cc
b/be/src/service/query-options-test.cc
index f396f3e6a..ca2072a45 100644
--- a/be/src/service/query-options-test.cc
+++ b/be/src/service/query-options-test.cc
@@ -448,6 +448,22 @@ TEST(QueryOptions, SetSpecialOptions) {
}
}
+void VerifyFilterTypes(const set<TRuntimeFilterType::type>& types,
+ const std::initializer_list<TRuntimeFilterType::type>& expects) {
+ EXPECT_EQ(expects.size(), types.size());
+ for (const auto t : expects) {
+ EXPECT_NE(types.end(), types.find(t));
+ }
+}
+
+void VerifyFilterIds(
+ const set<int32_t>& types, const std::initializer_list<int32_t>& expects) {
+ EXPECT_EQ(expects.size(), types.size());
+ for (const auto t : expects) {
+ EXPECT_NE(types.end(), types.find(t));
+ }
+}
+
TEST(QueryOptions, ParseQueryOptions) {
QueryOptionsMask expectedMask;
expectedMask.set(TImpalaQueryOptions::NUM_NODES);
@@ -473,6 +489,36 @@ TEST(QueryOptions, ParseQueryOptions) {
EXPECT_FALSE(status.ok());
EXPECT_EQ(status.msg().details().size(), 2);
}
+
+ QueryOptionsMask expectedMask2;
+ expectedMask2.set(TImpalaQueryOptions::ENABLED_RUNTIME_FILTER_TYPES);
+ expectedMask2.set(TImpalaQueryOptions::RUNTIME_FILTER_IDS_TO_SKIP);
+
+ {
+ TQueryOptions options;
+ QueryOptionsMask mask;
+ Status status =
ParseQueryOptions("enabled_runtime_filter_types=\"bloom,min_max\","
+ "runtime_filter_ids_to_skip=\"1,2\"",
+ &options, &mask);
+ VerifyFilterTypes(options.enabled_runtime_filter_types,
+ {TRuntimeFilterType::BLOOM, TRuntimeFilterType::MIN_MAX});
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {1, 2});
+ EXPECT_EQ(mask, expectedMask2);
+ EXPECT_TRUE(status.ok());
+ }
+
+ {
+ TQueryOptions options;
+ QueryOptionsMask mask;
+ Status status =
ParseQueryOptions("enabled_runtime_filter_types=bloom,min_max,"
+ "runtime_filter_ids_to_skip=1,2",
+ &options, &mask);
+ VerifyFilterTypes(options.enabled_runtime_filter_types,
{TRuntimeFilterType::BLOOM});
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {1});
+ EXPECT_EQ(mask, expectedMask2);
+ EXPECT_FALSE(status.ok());
+ EXPECT_EQ(status.msg().details().size(), 2);
+ }
}
TEST(QueryOptions, MapOptionalDefaultlessToEmptyString) {
@@ -602,14 +648,6 @@ TEST(QueryOptions, CompressionCodec) {
#undef ENTRY
}
-void VerifyFilterTypes(const set<TRuntimeFilterType::type>& types,
- const std::initializer_list<TRuntimeFilterType::type>& expects) {
- EXPECT_EQ(expects.size(), types.size());
- for (const auto t : expects) {
- EXPECT_NE(types.end(), types.find(t));
- }
-}
-
// Tests for setting of ENABLED_RUNTIME_FILTER_TYPES.
TEST(QueryOptions, EnabledRuntimeFilterTypes) {
const string KEY = "enabled_runtime_filter_types";
@@ -647,6 +685,15 @@ TEST(QueryOptions, EnabledRuntimeFilterTypes) {
TRuntimeFilterType::MIN_MAX
});
}
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "bloom , , min_max", &options,
nullptr).ok());
+ VerifyFilterTypes(options.enabled_runtime_filter_types,
+ {
+ TRuntimeFilterType::BLOOM,
+ TRuntimeFilterType::MIN_MAX
+ });
+ }
{
TQueryOptions options;
EXPECT_TRUE(SetQueryOption(KEY, "in_list,bloom", &options, nullptr).ok());
@@ -658,6 +705,59 @@ TEST(QueryOptions, EnabledRuntimeFilterTypes) {
}
}
+// Tests for setting of RUNTIME_FILTER_IDS_TO_SKIP.
+TEST(QueryOptions, RuntimeFilterIdsToSkip) {
+ const string KEY = "runtime_filter_ids_to_skip";
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "0", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {0});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "0,1", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {0, 1});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "111,2,33", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {2, 33, 111});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "-1,0,1", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {-1, 0, 1});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "1,6.9", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {1, 6});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "0, 1", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {0, 1});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "0,,1", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {0, 1});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_TRUE(SetQueryOption(KEY, "0, 1, , 2", &options, nullptr).ok());
+ VerifyFilterIds(options.runtime_filter_ids_to_skip, {0, 1, 2});
+ }
+ {
+ TQueryOptions options;
+ EXPECT_FALSE(SetQueryOption(KEY, "1,b", &options, nullptr).ok());
+ }
+ {
+ TQueryOptions options;
+ EXPECT_FALSE(SetQueryOption(KEY, "1,4294967295", &options, nullptr).ok());
+ }
+}
+
// Tests for setting of MAX_RESULT_SPOOLING_MEM and
// MAX_SPILLED_RESULT_SPOOLING_MEM. Setting of these options must maintain the
// condition 'MAX_RESULT_SPOOLING_MEM <= MAX_SPILLED_RESULT_SPOOLING_MEM'.
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index dae5e36fb..d04aab5dc 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -20,6 +20,7 @@
#include <limits>
#include <regex>
#include <sstream>
+#include <string>
#include <boost/algorithm/string.hpp>
#include <gutil/strings/strip.h>
@@ -102,6 +103,22 @@ const string PrintQueryOptionValue(
return val.str();
}
+std::ostream& impala::operator<<(std::ostream& out, const std::set<int32_t>&
filter_ids) {
+ bool first = true;
+ for (const auto& t : filter_ids) {
+ if (!first) out << ",";
+ out << t;
+ first = false;
+ }
+ return out;
+}
+
+const string PrintQueryOptionValue(const std::set<int32_t>& filter_ids) {
+ std::stringstream val;
+ val << filter_ids;
+ return val.str();
+}
+
void impala::TQueryOptionsToMap(
const TQueryOptions& query_options, std::map<string, string>*
configuration) {
#define QUERY_OPT_FN(NAME, ENUM, LEVEL) \
@@ -162,6 +179,18 @@ string impala::DebugQueryOptions(const TQueryOptions&
query_options) {
return ss.str();
}
+inline void TrimAndRemoveEmptyString(vector<string>& values) {
+ int i = 0;
+ while (i < values.size()) {
+ trim(values[i]);
+ if (values[i].length() == 0) {
+ values.erase(values.begin() + i);
+ } else {
+ i++;
+ }
+ }
+}
+
// Returns the TImpalaQueryOptions enum for the given "key". Input is case
insensitive.
// Return -1 if the input is an invalid option.
static int GetQueryOptionForKey(const string& key) {
@@ -858,6 +887,7 @@ Status impala::SetQueryOption(const string& key, const
string& value,
// Parse and verify the enabled runtime filter types.
vector<string> str_types;
split(str_types, filter_value, is_any_of(","), token_compress_on);
+ TrimAndRemoveEmptyString(str_types);
for (const auto& t : str_types) {
TRuntimeFilterType::type filter_type;
RETURN_IF_ERROR(GetThriftEnum(t, "runtime filter type",
@@ -1229,6 +1259,25 @@ Status impala::SetQueryOption(const string& key, const
string& value,
query_options->__set_iceberg_disable_count_star_optimization(IsTrue(value));
break;
}
+ case TImpalaQueryOptions::RUNTIME_FILTER_IDS_TO_SKIP: {
+ std::set<int32_t> filter_ids;
+ // This does quote handling similar as ENABLED_RUNTIME_FILTER_TYPES
option.
+ const string filter_value = std::regex_replace(value,
std::regex("^\"|\"$"), "");
+ vector<string> str_ids;
+ split(str_ids, filter_value, is_any_of(","), token_compress_on);
+ TrimAndRemoveEmptyString(str_ids);
+ for (const auto& t : str_ids) {
+ try {
+ int32_t filter_id = std::stoi(t);
+ filter_ids.insert(filter_id);
+ } catch (std::exception&) {
+ return Status::Expected(
+ "RUNTIME_FILTER_IDS_TO_SKIP is not a valid comma separated
integers.");
+ }
+ }
+ query_options->__set_runtime_filter_ids_to_skip(filter_ids);
+ break;
+ }
default:
if (IsRemovedQueryOption(key)) {
LOG(WARNING) << "Ignoring attempt to set removed query option '" <<
key << "'";
@@ -1252,7 +1301,20 @@ Status impala::ParseQueryOptions(const string& options,
TQueryOptions* query_opt
QueryOptionsMask* set_query_options_mask) {
if (options.length() == 0) return Status::OK();
vector<string> kv_pairs;
- split(kv_pairs, options, is_any_of(","), token_compress_on);
+ int double_quote_ct = 0;
+ int begin = 0;
+ int end = 0;
+ while (end < options.length()) {
+ if (options.at(end) == '"') {
+ double_quote_ct = (double_quote_ct + 1) % 2;
+ } else if (options.at(end) == ',' && double_quote_ct == 0) {
+ // Found comma that is not within two double quotes. This is an option
separator.
+ if (begin < end) kv_pairs.push_back(options.substr(begin, end - begin));
+ begin = end + 1;
+ }
+ end++;
+ }
+ if (begin < end) kv_pairs.push_back(options.substr(begin, end - begin));
// Construct an error status which is used to aggregate errors encountered
during
// parsing. It is only returned if the number of error details is greater
than 0.
Status errorStatus = Status::Expected("Errors parsing query options");
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 3d5a752b4..0738a10b7 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -34,6 +34,8 @@ class TQueryOptions;
std::ostream& operator<<(std::ostream& out,
const std::set<impala::TRuntimeFilterType::type>& filter_types);
+std::ostream& operator<<(std::ostream& out, const std::set<int32_t>&
filter_ids);
+
// Maps query option names to option levels used for displaying the query
// options via SET and SET ALL
typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
@@ -50,7 +52,7 @@ typedef std::unordered_map<string,
beeswax::TQueryOptionLevel::type>
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
#define QUERY_OPTS_TABLE
\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),
\
- TImpalaQueryOptions::ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION + 1);
\
+ TImpalaQueryOptions::RUNTIME_FILTER_IDS_TO_SKIP + 1);
\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded,
ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)
\
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)
\
@@ -326,6 +328,8 @@ typedef std::unordered_map<string,
beeswax::TQueryOptionLevel::type>
QUERY_OPT_FN(enable_tuple_cache, ENABLE_TUPLE_CACHE,
TQueryOptionLevel::ADVANCED) \
QUERY_OPT_FN(iceberg_disable_count_star_optimization,
\
ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION, TQueryOptionLevel::ADVANCED)
\
+ QUERY_OPT_FN(runtime_filter_ids_to_skip,
\
+ RUNTIME_FILTER_IDS_TO_SKIP, TQueryOptionLevel::DEVELOPMENT)
\
;
/// Enforce practical limits on some query options to avoid undesired query
state.
@@ -375,7 +379,8 @@ Status ValidateQueryOptions(TQueryOptions* query_options);
/// Parse a "," separated key=value pair of query options and set it in
'query_options'.
/// If the same query option is specified more than once, the last one wins.
The
/// set_query_options_mask bitmask is updated to reflect the query options
which were
-/// set. Returns an error status containing an error detail for any invalid
options (e.g.
+/// set. Double quote can be used to wrap a query option value that has ","
char in it.
+/// Returns an error status containing an error detail for any invalid options
(e.g.
/// bad format or invalid query option), but all valid query options are still
handled.
Status ParseQueryOptions(const std::string& options, TQueryOptions*
query_options,
QueryOptionsMask* set_query_options_mask);
diff --git a/common/thrift/ImpalaService.thrift
b/common/thrift/ImpalaService.thrift
index 6dc9efbc1..61f5dbd0a 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -924,6 +924,12 @@ enum TImpalaQueryOptions {
// Disables statistic-based count(*)-optimization for Iceberg tables.
ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION = 175
+
+ // List of runtime filter id to skip if it exists in query plan.
+ // If using JDBC client, use double quote to wrap multiple ids, like:
+ // RUNTIME_FILTER_IDS_TO_SKIP="1,2,3"
+ // If using impala-shell client, double quote is not required.
+ RUNTIME_FILTER_IDS_TO_SKIP = 176
}
// The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index 57f322e7d..01ff6ebbc 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -702,6 +702,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift
176: optional bool iceberg_disable_count_star_optimization = false;
+
+ // See comment in ImpalaService.thrift
+ 177: optional set<i32> runtime_filter_ids_to_skip
}
// Impala currently has three types of sessions: Beeswax, HiveServer2 and
external
diff --git
a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
index 0e626743d..c908e17d3 100644
--- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
+++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
@@ -888,7 +888,8 @@ public final class RuntimeFilterGenerator {
RuntimeFilterGenerator filterGenerator = new RuntimeFilterGenerator(
ctx.getQueryOptions());
filterGenerator.generateFilters(ctx, plan);
- List<RuntimeFilter> filters =
Lists.newArrayList(filterGenerator.getRuntimeFilters());
+ List<RuntimeFilter> filters =
+ Lists.newArrayList(filterGenerator.getRuntimeFilters(ctx));
if (filters.size() > maxNumBloomFilters) {
// If more than 'maxNumBloomFilters' were generated, sort them by
increasing
// selectivity and keep the 'maxNumBloomFilters' most selective bloom
filters.
@@ -1035,10 +1036,18 @@ public final class RuntimeFilterGenerator {
/**
* Returns a list of all the registered runtime filters, ordered by filter
ID.
*/
- public List<RuntimeFilter> getRuntimeFilters() {
+ public List<RuntimeFilter> getRuntimeFilters(PlannerContext ctx) {
Set<RuntimeFilter> resultSet = new HashSet<>();
for (List<RuntimeFilter> filters: runtimeFiltersByTid_.values()) {
- resultSet.addAll(filters);
+ for (RuntimeFilter filter : filters) {
+ if (ctx.getQueryOptions().isSetRuntime_filter_ids_to_skip()
+ && ctx.getQueryOptions().runtime_filter_ids_to_skip.contains(
+ filter.getFilterId().asInt())) {
+ // Skip this filter because it is explicitly excluded via query
option.
+ continue;
+ }
+ resultSet.add(filter);
+ }
}
List<RuntimeFilter> resultList = Lists.newArrayList(resultSet);
Collections.sort(resultList, new Comparator<RuntimeFilter>() {
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index a64c75be1..1553be896 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -33,7 +33,6 @@ import org.apache.impala.catalog.HBaseColumn;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.datagenerator.HBaseTestDataRegionAssignment;
-import org.apache.impala.planner.IcebergScanPlanner;
import org.apache.impala.service.BackendConfig;
import org.apache.impala.service.Frontend.PlanCtx;
import org.apache.impala.testutil.TestUtils;
diff --git
a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
index fa0291438..395d5edd5 100644
---
a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
+++
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
@@ -642,6 +642,106 @@ PLAN-ROOT SINK
HDFS partitions=24/24 files=24 size=478.45KB
row-size=37B cardinality=7.30K
====
+# RUNTIME_FILTER_IDS_TO_SKIP set to all even filter ids.
+select /* +straight_join */ count(*) from functional.alltypes a
+ join /* +broadcast */ functional.alltypes b on b.id = a.id and
+ b.date_string_col = a.date_string_col
+ join /* +broadcast */ functional.alltypes c on c.month = a.month and
+ c.int_col = a.int_col
+ join /* +shuffle */ functional.alltypes d on d.bool_col = a.bool_col and
d.year = a.year;
+---- QUERYOPTIONS
+RUNTIME_FILTER_IDS_TO_SKIP="0,4,8"
+---- PLAN
+PLAN-ROOT SINK
+|
+07:AGGREGATE [FINALIZE]
+| output: count(*)
+| row-size=8B cardinality=1
+|
+06:HASH JOIN [INNER JOIN]
+| hash predicates: a.`year` = d.`year`, a.bool_col = d.bool_col
+| runtime filters: RF001 <- d.bool_col
+| row-size=74B cardinality=16.21G
+|
+|--03:SCAN HDFS [functional.alltypes d]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=5B cardinality=7.30K
+|
+05:HASH JOIN [INNER JOIN]
+| hash predicates: a.`month` = c.`month`, a.int_col = c.int_col
+| runtime filters: RF005 <- c.int_col
+| row-size=69B cardinality=4.44M
+|
+|--02:SCAN HDFS [functional.alltypes c]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=8B cardinality=7.30K
+|
+04:HASH JOIN [INNER JOIN]
+| hash predicates: a.id = b.id, a.date_string_col = b.date_string_col
+| runtime filters: RF009 <- b.date_string_col
+| row-size=61B cardinality=7.30K
+|
+|--01:SCAN HDFS [functional.alltypes b]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=24B cardinality=7.30K
+|
+00:SCAN HDFS [functional.alltypes a]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ runtime filters: RF001 -> a.bool_col, RF005 -> a.int_col, RF009 ->
a.date_string_col
+ row-size=37B cardinality=7.30K
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+13:AGGREGATE [FINALIZE]
+| output: count:merge(*)
+| row-size=8B cardinality=1
+|
+12:EXCHANGE [UNPARTITIONED]
+|
+07:AGGREGATE
+| output: count(*)
+| row-size=8B cardinality=1
+|
+06:HASH JOIN [INNER JOIN, PARTITIONED]
+| hash predicates: a.`year` = d.`year`, a.bool_col = d.bool_col
+| runtime filters: RF001 <- d.bool_col
+| row-size=74B cardinality=16.21G
+|
+|--11:EXCHANGE [HASH(d.`year`,d.bool_col)]
+| |
+| 03:SCAN HDFS [functional.alltypes d]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=5B cardinality=7.30K
+|
+10:EXCHANGE [HASH(a.`year`,a.bool_col)]
+|
+05:HASH JOIN [INNER JOIN, BROADCAST]
+| hash predicates: a.`month` = c.`month`, a.int_col = c.int_col
+| runtime filters: RF005 <- c.int_col
+| row-size=69B cardinality=4.44M
+|
+|--09:EXCHANGE [BROADCAST]
+| |
+| 02:SCAN HDFS [functional.alltypes c]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=8B cardinality=7.30K
+|
+04:HASH JOIN [INNER JOIN, BROADCAST]
+| hash predicates: a.id = b.id, a.date_string_col = b.date_string_col
+| runtime filters: RF009 <- b.date_string_col
+| row-size=61B cardinality=7.30K
+|
+|--08:EXCHANGE [BROADCAST]
+| |
+| 01:SCAN HDFS [functional.alltypes b]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=24B cardinality=7.30K
+|
+00:SCAN HDFS [functional.alltypes a]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ runtime filters: RF001 -> a.bool_col, RF005 -> a.int_col, RF009 ->
a.date_string_col
+ row-size=37B cardinality=7.30K
+====
# DISABLE_ROW_RUNTIME_FILTERING completely disables filters for Kudu.
select /* +straight_join */ count(*) from functional_kudu.alltypes a
join functional_kudu.alltypes b on a.id = b.id
@@ -853,6 +953,78 @@ Per-Host Resources: mem-estimate=33.96MB
mem-reservation=1.96MB thread-reservati
tuple-ids=0 row-size=4B cardinality=3.43K
in pipelines: 00(GETNEXT)
====
+# Test that ENABLED_RUNTIME_FILTER_TYPES accept both BLOOM and IN_LIST.
+# IN-list filter is assigned to ORC.
+select /* +straight_join */ count(*) from functional_orc_def.alltypes a
+ join /* +broadcast */ functional_orc_def.alltypestiny b on a.id = b.id
+---- QUERYOPTIONS
+ENABLED_RUNTIME_FILTER_TYPES="BLOOM,IN_LIST"
+DISABLE_ROW_RUNTIME_FILTERING=false
+EXPLAIN_LEVEL=2
+---- DISTRIBUTEDPLAN
+F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+| Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB
thread-reservation=1
+PLAN-ROOT SINK
+| output exprs: count(*)
+| mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+06:AGGREGATE [FINALIZE]
+| output: count:merge(*)
+| mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB
thread-reservation=0
+| tuple-ids=2 row-size=8B cardinality=1
+| in pipelines: 06(GETNEXT), 03(OPEN)
+|
+05:EXCHANGE [UNPARTITIONED]
+| mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+| tuple-ids=2 row-size=8B cardinality=1
+| in pipelines: 03(GETNEXT)
+|
+F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+Per-Host Resources: mem-estimate=34.96MB mem-reservation=2.96MB
thread-reservation=2 runtime-filters-memory=1.00MB
+03:AGGREGATE
+| output: count(*)
+| mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB
thread-reservation=0
+| tuple-ids=2 row-size=8B cardinality=1
+| in pipelines: 03(GETNEXT), 00(OPEN)
+|
+02:HASH JOIN [INNER JOIN, BROADCAST]
+| hash predicates: a.id = b.id
+| fk/pk conjuncts: assumed fk/pk
+| runtime filters: RF000[bloom] <- b.id, RF001[in_list] <- b.id
+| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB
thread-reservation=0
+| tuple-ids=0,1 row-size=8B cardinality=3.43K
+| in pipelines: 00(GETNEXT), 01(OPEN)
+|
+|--04:EXCHANGE [BROADCAST]
+| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+| | tuple-ids=1 row-size=4B cardinality=353
+| | in pipelines: 01(GETNEXT)
+| |
+| F01:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+| Per-Host Resources: mem-estimate=32.03MB mem-reservation=16.00KB
thread-reservation=2
+| 01:SCAN HDFS [functional_orc_def.alltypestiny b, RANDOM]
+| HDFS partitions=4/4 files=4 size=5.55KB
+| stored statistics:
+| table: rows=unavailable size=unavailable
+| partitions: 0/4 rows=353
+| columns: unavailable
+| extrapolated-rows=disabled max-scan-range-rows=unavailable
+| mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
+| tuple-ids=1 row-size=4B cardinality=353
+| in pipelines: 01(GETNEXT)
+|
+00:SCAN HDFS [functional_orc_def.alltypes a, RANDOM]
+ HDFS partitions=24/24 files=24 size=53.97KB
+ runtime filters: RF000[bloom] -> a.id, RF001[in_list] -> a.id
+ stored statistics:
+ table: rows=unavailable size=unavailable
+ partitions: 0/24 rows=3.43K
+ columns: unavailable
+ extrapolated-rows=disabled max-scan-range-rows=unavailable
+ mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
+ tuple-ids=0 row-size=4B cardinality=3.43K
+ in pipelines: 00(GETNEXT)
+====
# ENABLED_RUNTIME_FILTER_TYPES is set as IN_LIST. Make sure no IN-list filters
# is generated for partitioned join.
select /* +straight_join */ count(*) from functional_orc_def.alltypes a