(doris) branch master updated: [fix](nereids)the column name should be case insensitive in tablet prune (#44064)

dataroaring Mon, 18 Nov 2024 02:05:42 -0800

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new f74610c3be1 [fix](nereids)the column name should be case insensitive 
in tablet prune (#44064)
f74610c3be1 is described below

commit f74610c3be14d2032f6a6d30a9d88c9c813f7262
Author: starocean999 <li...@selectdb.com>
AuthorDate: Mon Nov 18 18:03:55 2024 +0800

    [fix](nereids)the column name should be case insensitive in tablet prune 
(#44064)
    
    ```
    CREATE TABLE `t_customers_wide_index_1` (   `CUSTOMER_ID` int NULL,   
`ADDRESS` varchar(1500) NULL)DUPLICATE KEY(`CUSTOMER_ID`) DISTRIBUTED BY 
HASH(`CUSTOMER_ID`) BUCKETS 32;
    insert into t_customers_wide_index values (1, "111");
    
    explain SELECT * from t_customers_wide_index WHERE customer_id = 1817422;
    ```
    before this pr, the tablet prune doesn't work
    `tablets=32/32, tabletList=1451717,1451719,1451721 ...`
    after this pr, the unused tablet is pruned like bellow:
    `tablets=1/32, tabletList=1451767 `
---
 .../nereids/rules/rewrite/PruneOlapScanTablet.java |  4 +--
 .../java/org/apache/doris/planner/ScanNode.java    |  3 +-
 .../doris/planner/HashDistributionPrunerTest.java  | 24 ++++++++--------
 .../org/apache/doris/planner/OlapScanNodeTest.java |  5 ++--
 .../tablet_prune/test_tablet_prune.groovy          | 32 ++++++++++++++++++++++
 5 files changed, 51 insertions(+), 17 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
index 8cf4f91deec..4b86b7a3706 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java
@@ -34,7 +34,7 @@ import org.apache.doris.planner.PartitionColumnFilter;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList.Builder;
-import com.google.common.collect.Maps;
+import org.apache.commons.collections.map.CaseInsensitiveMap;
 
 import java.util.Collection;
 import java.util.HashSet;
@@ -80,7 +80,7 @@ public class PruneOlapScanTablet extends 
OneRewriteRuleFactory {
             return index.getTabletIdsInOrder();
         }
         HashDistributionInfo hashInfo = (HashDistributionInfo) info;
-        Map<String, PartitionColumnFilter> filterMap = Maps.newHashMap();
+        Map<String, PartitionColumnFilter> filterMap = new 
CaseInsensitiveMap();
         
expressions.stream().map(ExpressionUtils::checkAndMaybeCommute).filter(Optional::isPresent)
                 .forEach(expr -> new 
ExpressionColumnFilterConverter(filterMap).convert(expr.get()));
         return new HashDistributionPruner(index.getTabletIdsInOrder(),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
index b392075be9b..53aa0d5b24d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
@@ -68,6 +68,7 @@ import com.google.common.collect.Range;
 import com.google.common.collect.RangeSet;
 import com.google.common.collect.Sets;
 import com.google.common.collect.TreeRangeSet;
+import org.apache.commons.collections.map.CaseInsensitiveMap;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -89,7 +90,7 @@ public abstract class ScanNode extends PlanNode implements 
SplitGenerator {
     protected static final int NUM_SPLITTERS_ON_FLIGHT = 
Config.max_external_cache_loader_thread_pool_size;
     protected final TupleDescriptor desc;
     // for distribution prunner
-    protected Map<String, PartitionColumnFilter> columnFilters = 
Maps.newHashMap();
+    protected Map<String, PartitionColumnFilter> columnFilters = new 
CaseInsensitiveMap();
     // Use this if partition_prune_algorithm_version is 2.
     protected Map<String, ColumnRange> columnNameToRange = Maps.newHashMap();
     protected String sortColumn = null;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/planner/HashDistributionPrunerTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/planner/HashDistributionPrunerTest.java
index 2da47314d68..e2896ee2390 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/planner/HashDistributionPrunerTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/planner/HashDistributionPrunerTest.java
@@ -26,8 +26,8 @@ import org.apache.doris.catalog.PartitionKey;
 import org.apache.doris.catalog.PrimitiveType;
 
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+import org.apache.commons.collections.map.CaseInsensitiveMap;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -84,12 +84,12 @@ public class HashDistributionPrunerTest {
         inList4.add(new StringLiteral("2"));
         shopTypeFilter.setInPredicate(new InPredicate(new SlotRef(null, 
"shop_type"), inList4, false));
 
-        Map<String, PartitionColumnFilter> filters = Maps.newHashMap();
-        filters.put("dealDate", dealDatefilter);
-        filters.put("main_brand_id", mainBrandFilter);
-        filters.put("item_third_cate_id", itemThirdFilter);
-        filters.put("channel", channelFilter);
-        filters.put("shop_type", shopTypeFilter);
+        Map<String, PartitionColumnFilter> filters = new CaseInsensitiveMap();
+        filters.put("DEALDATE", dealDatefilter);
+        filters.put("MAIN_BRAND_ID", mainBrandFilter);
+        filters.put("ITEM_THIRD_CATE_ID", itemThirdFilter);
+        filters.put("CHANNEL", channelFilter);
+        filters.put("SHOP_TYPE", shopTypeFilter);
 
         HashDistributionPruner pruner = new HashDistributionPruner(tabletIds, 
columns, filters, tabletIds.size(), true);
 
@@ -97,16 +97,16 @@ public class HashDistributionPrunerTest {
         // 20 = 1 * 5 * 2 * 2 * 1 (element num of each filter)
         Assert.assertEquals(20, results.size());
 
-        filters.get("shop_type").getInPredicate().addChild(new 
StringLiteral("4"));
+        filters.get("SHOP_TYPE").getInPredicate().addChild(new 
StringLiteral("4"));
         results = pruner.prune();
         // 40 = 1 * 5 * 2 * 2 * 2 (element num of each filter)
         // 39 is because these is hash conflict
         Assert.assertEquals(39, results.size());
 
-        filters.get("shop_type").getInPredicate().addChild(new 
StringLiteral("5"));
-        filters.get("shop_type").getInPredicate().addChild(new 
StringLiteral("6"));
-        filters.get("shop_type").getInPredicate().addChild(new 
StringLiteral("7"));
-        filters.get("shop_type").getInPredicate().addChild(new 
StringLiteral("8"));
+        filters.get("SHOP_TYPE").getInPredicate().addChild(new 
StringLiteral("5"));
+        filters.get("SHOP_TYPE").getInPredicate().addChild(new 
StringLiteral("6"));
+        filters.get("SHOP_TYPE").getInPredicate().addChild(new 
StringLiteral("7"));
+        filters.get("SHOP_TYPE").getInPredicate().addChild(new 
StringLiteral("8"));
         results = pruner.prune();
         // 120 = 1 * 5 * 2 * 2 * 6 (element num of each filter) > 100
         Assert.assertEquals(300, results.size());
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/planner/OlapScanNodeTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/planner/OlapScanNodeTest.java
index d8d78543558..417cf8b55d0 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/OlapScanNodeTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/OlapScanNodeTest.java
@@ -31,6 +31,7 @@ import org.apache.doris.qe.GlobalVariable;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import org.apache.commons.collections.map.CaseInsensitiveMap;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -62,8 +63,8 @@ public class OlapScanNodeTest {
 
         PartitionColumnFilter  columnFilter = new PartitionColumnFilter();
         columnFilter.setInPredicate(inPredicate);
-        Map<String, PartitionColumnFilter> filterMap = Maps.newHashMap();
-        filterMap.put("columnA", columnFilter);
+        Map<String, PartitionColumnFilter> filterMap = new 
CaseInsensitiveMap();
+        filterMap.put("COLUMNA", columnFilter);
 
         DistributionPruner partitionPruner  = new HashDistributionPruner(
                 partitions,
diff --git 
a/regression-test/suites/nereids_rules_p0/tablet_prune/test_tablet_prune.groovy 
b/regression-test/suites/nereids_rules_p0/tablet_prune/test_tablet_prune.groovy
new file mode 100644
index 00000000000..a87571a7340
--- /dev/null
+++ 
b/regression-test/suites/nereids_rules_p0/tablet_prune/test_tablet_prune.groovy
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_tablet_prune") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    sql "drop table if exists t_customers_wide_index"
+    sql """
+    CREATE TABLE `t_customers_wide_index` (   `CUSTOMER_ID` int NULL,   
`ADDRESS` varchar(1500) NULL) ENGINE=OLAP UNIQUE KEY(`CUSTOMER_ID`) DISTRIBUTED 
BY HASH(`CUSTOMER_ID`) BUCKETS 32 PROPERTIES ( "file_cache_ttl_seconds" = "0", 
"is_being_synced" = "false", "storage_medium" = "hdd", "storage_format" = "V2", 
"inverted_index_storage_format" = "V2", "enable_unique_key_merge_on_write" = 
"true", "light_schema_change" = "true", "store_row_column" = "true", 
"row_store_page_size" = "16384", "d [...]
+    sql """
+    insert into t_customers_wide_index values (1, "111");
+    """
+    explain {
+        sql("SELECT * from t_customers_wide_index WHERE customer_id = 
1817422;")
+        contains "tablets=1/32"
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) branch master updated: [fix](nereids)the column name should be case insensitive in tablet prune (#44064)

Reply via email to