This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dd1bd6d8f1 [Fix](multi catalog)Support hive default partition. (#17179)
dd1bd6d8f1 is described below

commit dd1bd6d8f17466e9390b5d5fecc2d6d90b4a6b3e
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Feb 28 00:08:29 2023 +0800

    [Fix](multi catalog)Support hive default partition. (#17179)
    
    Hive store all the data without partition columns to a default partition 
named __HIVE_DEFAULT_PARTITION__.
    Doris will fail to get the this partition when the partition column type is 
INT or something else that
    __HIVE_DEFAULT_PARTITION__ couldn't convert to.
    This pr is to support hive default partition, set the column value to NULL 
for the missing partition columns.
---
 .../org/apache/doris/analysis/PartitionValue.java  |  13 ++
 .../org/apache/doris/common/util/BrokerUtil.java   |   5 +-
 .../doris/datasource/hive/HiveMetaStoreCache.java  |   3 +-
 .../doris/planner/ListPartitionPrunerV2.java       |  18 +++
 .../doris/planner/PartitionPrunerV2Base.java       |  14 +++
 .../doris/planner/RangePartitionPrunerV2.java      |   5 +
 .../doris/planner/external/HiveScanProvider.java   |   3 +-
 .../hive/test_hive_default_partition.out           | 135 +++++++++++++++++++++
 .../hive/test_hive_default_partition.groovy        |  85 +++++++++++++
 9 files changed, 278 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
index f61c4be669..b273b798a3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
@@ -24,6 +24,7 @@ public class PartitionValue {
     public static final PartitionValue MAX_VALUE = new PartitionValue();
 
     private String value;
+    private boolean isHiveDefaultPartition;
 
     private PartitionValue() {
 
@@ -33,7 +34,15 @@ public class PartitionValue {
         this.value = value;
     }
 
+    public PartitionValue(String value, boolean isHiveDefaultPartition) {
+        this.value = value;
+        this.isHiveDefaultPartition = isHiveDefaultPartition;
+    }
+
     public LiteralExpr getValue(Type type) throws AnalysisException {
+        if (isHiveDefaultPartition) {
+            return new StringLiteral(value);
+        }
         if (isMax()) {
             return LiteralExpr.createInfinity(type, true);
         } else {
@@ -52,4 +61,8 @@ public class PartitionValue {
             return value;
         }
     }
+
+    public boolean isHiveDefaultPartition() {
+        return isHiveDefaultPartition;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
index f11c5fe29d..b41c16e6ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
@@ -26,8 +26,10 @@ import org.apache.doris.catalog.FsBroker;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.ClientPool;
 import org.apache.doris.common.Config;
+import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.UserException;
+import org.apache.doris.datasource.hive.HiveMetaStoreCache;
 import org.apache.doris.service.FrontendOptions;
 import org.apache.doris.thrift.TBrokerCheckPathExistRequest;
 import org.apache.doris.thrift.TBrokerCheckPathExistResponse;
@@ -151,7 +153,8 @@ public class BrokerUtil {
             if (index == -1) {
                 continue;
             }
-            columns[index] = pair[1];
+            columns[index] = 
HiveMetaStoreCache.HIVE_DEFAULT_PARTITION.equals(pair[1])
+                ? FeConstants.null_string : pair[1];
             size++;
             if (size >= columnsFromPath.size()) {
                 break;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index a1ab3cc92f..31b8d2f3b4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -80,6 +80,7 @@ import java.util.stream.Stream;
 public class HiveMetaStoreCache {
     private static final Logger LOG = 
LogManager.getLogger(HiveMetaStoreCache.class);
     private static final int MIN_BATCH_FETCH_PARTITION_NUM = 50;
+    public static final String HIVE_DEFAULT_PARTITION = 
"__HIVE_DEFAULT_PARTITION__";
 
     private HMSExternalCatalog catalog;
 
@@ -207,7 +208,7 @@ public class HiveMetaStoreCache {
         for (String part : parts) {
             String[] kv = part.split("=");
             Preconditions.checkState(kv.length == 2, partitionName);
-            values.add(new PartitionValue(kv[1]));
+            values.add(new PartitionValue(kv[1], 
HIVE_DEFAULT_PARTITION.equals(kv[1])));
         }
         try {
             PartitionKey key = 
PartitionKey.createListPartitionKeyWithTypes(values, types);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
index fcae3c4ecd..5b8ec54b3a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
@@ -79,6 +79,19 @@ public class ListPartitionPrunerV2 extends 
PartitionPrunerV2Base {
         this.rangeToId = rangeToId;
     }
 
+    // For hive partition table.
+    public ListPartitionPrunerV2(Map<Long, PartitionItem> idToPartitionItem,
+                                 List<Column> partitionColumns,
+                                 Map<String, ColumnRange> columnNameToRange,
+                                 Map<UniqueId, Range<PartitionKey>> 
uidToPartitionRange,
+                                 Map<Range<PartitionKey>, UniqueId> rangeToId,
+                                 RangeMap<ColumnBound, UniqueId> 
singleColumnRangeMap,
+                                 boolean isHive) {
+        super(idToPartitionItem, partitionColumns, columnNameToRange, 
singleColumnRangeMap, isHive);
+        this.uidToPartitionRange = uidToPartitionRange;
+        this.rangeToId = rangeToId;
+    }
+
     public static Map<UniqueId, Range<PartitionKey>> genUidToPartitionRange(
             Map<Long, PartitionItem> idToPartitionItem, Map<Long, 
List<UniqueId>> idToUniqueIdsMap) {
         Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = 
Maps.newHashMap();
@@ -147,6 +160,11 @@ public class ListPartitionPrunerV2 extends 
PartitionPrunerV2Base {
 
         Optional<RangeSet<ColumnBound>> rangeSetOpt = 
columnRange.getRangeSet();
         if (columnRange.hasConjunctiveIsNull() || !rangeSetOpt.isPresent()) {
+            // For Hive external table, partition column could be null.
+            // In which case, the data will be put to a default partition 
__HIVE_DEFAULT_PARTITION__
+            if (isHive) {
+                return FinalFilters.noFilters();
+            }
             return FinalFilters.constantFalseFilters();
         } else {
             RangeSet<ColumnBound> rangeSet = rangeSetOpt.get();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
index e760a85d84..376e2a4c7f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
@@ -43,6 +43,8 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
     protected final Map<String, ColumnRange> columnNameToRange;
     // used for single column partition
     protected RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
+    // Flag to indicate if this pruner is for hive partition or not.
+    protected boolean isHive = false;
     // currently only used for list partition
     private Map.Entry<Long, PartitionItem> defaultPartition;
 
@@ -83,6 +85,18 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
                                 .orElse(null);
     }
 
+    public PartitionPrunerV2Base(Map<Long, PartitionItem> idToPartitionItem,
+                                 List<Column> partitionColumns,
+                                 Map<String, ColumnRange> columnNameToRange,
+                                 RangeMap<ColumnBound, UniqueId> 
singleColumnRangeMap,
+                                 boolean isHive) {
+        this.idToPartitionItem = idToPartitionItem;
+        this.partitionColumns = partitionColumns;
+        this.columnNameToRange = columnNameToRange;
+        this.singleColumnRangeMap = singleColumnRangeMap;
+        this.isHive = isHive;
+    }
+
     @Override
     public Collection<Long> prune() throws AnalysisException {
         Map<Column, FinalFilters> columnToFilters = Maps.newHashMap();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
index 4aa3ee41a5..8acba72f15 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
@@ -86,6 +86,11 @@ public class RangePartitionPrunerV2 extends 
PartitionPrunerV2Base {
         Optional<RangeSet<ColumnBound>> rangeSetOpt = 
columnRange.getRangeSet();
         if (columnRange.hasConjunctiveIsNull()) {
             if (!rangeSetOpt.isPresent()) {
+                // For Hive external table, partition column could be null.
+                // In which case, the data will be put to a default partition 
__HIVE_DEFAULT_PARTITION__
+                if (isHive) {
+                    return FinalFilters.noFilters();
+                }
                 // Only has conjunctive `is null` predicate.
                 return 
FinalFilters.create(Sets.newHashSet(getMinInfinityRange(column)));
             } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
index e69f9788d0..f538bf3c3e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
@@ -161,7 +161,8 @@ public class HiveScanProvider extends HMSTableScanProvider {
                         hmsTable.getPartitionColumns(), columnNameToRange,
                         hivePartitionValues.getUidToPartitionRange(),
                         hivePartitionValues.getRangeToId(),
-                        hivePartitionValues.getSingleColumnRangeMap());
+                        hivePartitionValues.getSingleColumnRangeMap(),
+                        true);
                 Collection<Long> filteredPartitionIds = pruner.prune();
                 this.readPartitionNum = filteredPartitionIds.size();
                 LOG.debug("hive partition fetch and prune for table {}.{} 
cost: {} ms",
diff --git 
a/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out
 
b/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out
new file mode 100644
index 0000000000..3737fbc7f6
--- /dev/null
+++ 
b/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out
@@ -0,0 +1,135 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !one_partition1 --
+1      1
+2      1
+3      2
+4      2
+5      \N
+6      \N
+
+-- !one_partition2 --
+5      \N
+6      \N
+
+-- !one_partition3 --
+1
+2
+3
+4
+
+-- !one_partition4 --
+1
+1
+2
+2
+
+-- !one_partition5 --
+4      2
+5      \N
+6      \N
+
+-- !two_partition1 --
+1      \N      one
+2      \N      one
+3      2       \N
+4      2       \N
+5      3       three
+6      3       three
+7      \N      \N
+8      \N      \N
+
+-- !two_partition2 --
+1      \N      one
+2      \N      one
+7      \N      \N
+8      \N      \N
+
+-- !two_partition3 --
+3      2       \N
+4      2       \N
+5      3       three
+6      3       three
+
+-- !two_partition4 --
+3      2       \N
+4      2       \N
+7      \N      \N
+8      \N      \N
+
+-- !two_partition5 --
+1      \N      one
+2      \N      one
+5      3       three
+6      3       three
+
+-- !two_partition6 --
+5      3       three
+6      3       three
+
+-- !two_partition7 --
+1      \N      one
+2      \N      one
+
+-- !two_partition8 --
+3      2       \N
+4      2       \N
+
+-- !two_partition9 --
+7      \N      \N
+8      \N      \N
+
+-- !two_partition10 --
+1      \N      one
+2      \N      one
+3      2       \N
+4      2       \N
+5      3       three
+6      3       three
+
+-- !two_partition11 --
+1      \N      one
+2      \N      one
+5      3       three
+6      3       three
+7      \N      \N
+8      \N      \N
+
+-- !two_partition12 --
+3      2       \N
+4      2       \N
+5      3       three
+6      3       three
+7      \N      \N
+8      \N      \N
+
+-- !two_partition13 --
+1      \N      one
+2      \N      one
+3      2       \N
+4      2       \N
+7      \N      \N
+8      \N      \N
+
+-- !two_partition14 --
+1      \N      one
+2      \N      one
+3      2       \N
+4      2       \N
+5      3       three
+6      3       three
+
+-- !two_partition15 --
+6      3       three
+7      \N      \N
+8      \N      \N
+
+-- !two_partition16 --
+3      2       \N
+4      2       \N
+5      3       three
+6      3       three
+
+-- !two_partition17 --
+1      \N      one
+2      \N      one
+
diff --git 
a/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy
 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy
new file mode 100644
index 0000000000..2deddb9d2d
--- /dev/null
+++ 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_default_partition", "p2") {
+    def one_partition1 = """select * from one_partition order by id;"""
+    def one_partition2 = """select id, part1 from one_partition where part1 is 
null order by id;"""
+    def one_partition3 = """select id from one_partition where part1 is not 
null order by id;"""
+    def one_partition4 = """select part1 from one_partition where part1>0 
order by id;"""
+    def one_partition5 = """select id, part1 from one_partition where part1 is 
null or id>3 order by id;"""
+
+    def two_partition1 = """select * from two_partition order by id;"""
+    def two_partition2 = """select id, part1, part2 from two_partition where 
part1 is null order by id;"""
+    def two_partition3 = """select id, part1, part2 from two_partition where 
part1 is not null order by id;"""
+    def two_partition4 = """select id, part1, part2 from two_partition where 
part2 is null order by id;"""
+    def two_partition5 = """select id, part1, part2 from two_partition where 
part2 is not null order by id;"""
+    def two_partition6 = """select id, part1, part2 from two_partition where 
part1 is not null and part2 is not null order by id;"""
+    def two_partition7 = """select id, part1, part2 from two_partition where 
part1 is null and part2 is not null order by id;"""
+    def two_partition8 = """select id, part1, part2 from two_partition where 
part1 is not null and part2 is null order by id;"""
+    def two_partition9 = """select id, part1, part2 from two_partition where 
part1 is null and part2 is null order by id;"""
+    def two_partition10 = """select id, part1, part2 from two_partition where 
part1 is not null or part2 is not null order by id;"""
+    def two_partition11 = """select id, part1, part2 from two_partition where 
part1 is null or part2 is not null order by id;"""
+    def two_partition12 = """select id, part1, part2 from two_partition where 
part1 is not null or part2 is null order by id;"""
+    def two_partition13 = """select id, part1, part2 from two_partition where 
part1 is null or part2 is null order by id;"""
+    def two_partition14 = """select id, part1, part2 from two_partition where 
part1 is not null or part2 is not null order by id;"""
+    def two_partition15 = """select id, part1, part2 from two_partition where 
id > 5 order by id;"""
+    def two_partition16 = """select id, part1, part2 from two_partition where 
part1>0 order by id;"""
+    def two_partition17 = """select id, part1, part2 from two_partition where 
part2 = 'one' order by id;"""
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "hive_default_partition"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use multi_catalog;"""
+        qt_one_partition1 one_partition1
+        qt_one_partition2 one_partition2
+        qt_one_partition3 one_partition3
+        qt_one_partition4 one_partition4
+        qt_one_partition5 one_partition5
+
+        qt_two_partition1 two_partition1
+        qt_two_partition2 two_partition2
+        qt_two_partition3 two_partition3
+        qt_two_partition4 two_partition4
+        qt_two_partition5 two_partition5
+        qt_two_partition6 two_partition6
+        qt_two_partition7 two_partition7
+        qt_two_partition8 two_partition8
+        qt_two_partition9 two_partition9
+        qt_two_partition10 two_partition10
+        qt_two_partition11 two_partition11
+        qt_two_partition12 two_partition12
+        qt_two_partition13 two_partition13
+        qt_two_partition14 two_partition14
+        qt_two_partition15 two_partition15
+        qt_two_partition16 two_partition16
+        qt_two_partition17 two_partition17
+
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to