This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 27b45ad32ab72af266be8b7d28aa9761c85e5219 Author: Eyizoha <[email protected]> AuthorDate: Thu Oct 26 16:11:52 2023 +0800 IMPALA-12508: Update query option "enabled_runtime_filter_types" comments The query option "enabled_runtime_filter_types" currently allows specifying a comma-separated list of runtime filter types (including BLOOM, MIN_MAX and IN_LIST) to enable or setting it to "all" to use all types. But the comment for this query option does not mention IN_LIST and states that it only applies to Kudu, which is outdated, as it also affects HDFS scans after IMPALA-10325. Therefore, this patch update the relevant comments to avoid any misleading caused by the outdated comments. Change-Id: Iecff7c655e273712d851d9ce94ef5713e579aa72 Reviewed-on: http://gerrit.cloudera.org:8080/20627 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- common/thrift/ImpalaService.thrift | 14 +++++++++++--- .../org/apache/impala/planner/RuntimeFilterGenerator.java | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index 83e37f629..cb1a3f1b3 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -532,11 +532,19 @@ enum TImpalaQueryOptions { RETRY_FAILED_QUERIES = 102 // Enabled runtime filter types to be applied to scanner. - // This option only apply to Kudu now, will apply to HDFS once we support - // min-max filter for HDFS. + // This option only apply to Hdfs scan node and Kudu scan node. + // Specify the enabled types by a comma-separated list or enable all types by "ALL". // BLOOM - apply bloom filter only, // MIN_MAX - apply min-max filter only. - // ALL - apply both bloom filter and min-max filter (default). + // IN_LIST - apply in-list filter only. + // ALL - apply all types of runtime filters. + // Default is [BLOOM, MIN_MAX]. + // Depending on the scan node type, Planner can schedule compatible runtime filter type + // as follows: + // Kudu scan: BLOOM, MIN_MAX + // Hdfs scan on Parquet file: BLOOM, MIN_MAX + // Hdfs scan on ORC file: BLOOM, IN_LIST + // Hdfs scan on other kind of file: BLOOM ENABLED_RUNTIME_FILTER_TYPES = 103 // Enable asynchronous codegen. diff --git a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java index 5034a61ad..fc69cdc06 100644 --- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java +++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java @@ -1087,7 +1087,8 @@ public final class RuntimeFilterGenerator { * scan node. * 3. Only Hdfs and Kudu scan nodes are supported: * a. If the target is an HdfsScanNode, the filter must be type BLOOM/IN_LIST for - * non Parquet tables, or type BLOOM/MIN_MAX/IN_LIST for Parquet tables. + * ORC tables, or type BLOOM/MIN_MAX for Parquet tables, or type BLOOM for other + * kind of tables. * b. If the target is a KuduScanNode, the filter could be type MIN_MAX, and/or * BLOOM, the target must be a slot ref on a column, and the comp op cannot * be 'not distinct'.
