This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new dd1bd6d8f1 [Fix](multi catalog)Support hive default partition. (#17179)
dd1bd6d8f1 is described below
commit dd1bd6d8f17466e9390b5d5fecc2d6d90b4a6b3e
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Feb 28 00:08:29 2023 +0800
[Fix](multi catalog)Support hive default partition. (#17179)
Hive store all the data without partition columns to a default partition
named __HIVE_DEFAULT_PARTITION__.
Doris will fail to get the this partition when the partition column type is
INT or something else that
__HIVE_DEFAULT_PARTITION__ couldn't convert to.
This pr is to support hive default partition, set the column value to NULL
for the missing partition columns.
---
.../org/apache/doris/analysis/PartitionValue.java | 13 ++
.../org/apache/doris/common/util/BrokerUtil.java | 5 +-
.../doris/datasource/hive/HiveMetaStoreCache.java | 3 +-
.../doris/planner/ListPartitionPrunerV2.java | 18 +++
.../doris/planner/PartitionPrunerV2Base.java | 14 +++
.../doris/planner/RangePartitionPrunerV2.java | 5 +
.../doris/planner/external/HiveScanProvider.java | 3 +-
.../hive/test_hive_default_partition.out | 135 +++++++++++++++++++++
.../hive/test_hive_default_partition.groovy | 85 +++++++++++++
9 files changed, 278 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
index f61c4be669..b273b798a3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java
@@ -24,6 +24,7 @@ public class PartitionValue {
public static final PartitionValue MAX_VALUE = new PartitionValue();
private String value;
+ private boolean isHiveDefaultPartition;
private PartitionValue() {
@@ -33,7 +34,15 @@ public class PartitionValue {
this.value = value;
}
+ public PartitionValue(String value, boolean isHiveDefaultPartition) {
+ this.value = value;
+ this.isHiveDefaultPartition = isHiveDefaultPartition;
+ }
+
public LiteralExpr getValue(Type type) throws AnalysisException {
+ if (isHiveDefaultPartition) {
+ return new StringLiteral(value);
+ }
if (isMax()) {
return LiteralExpr.createInfinity(type, true);
} else {
@@ -52,4 +61,8 @@ public class PartitionValue {
return value;
}
}
+
+ public boolean isHiveDefaultPartition() {
+ return isHiveDefaultPartition;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
index f11c5fe29d..b41c16e6ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java
@@ -26,8 +26,10 @@ import org.apache.doris.catalog.FsBroker;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ClientPool;
import org.apache.doris.common.Config;
+import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
+import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.service.FrontendOptions;
import org.apache.doris.thrift.TBrokerCheckPathExistRequest;
import org.apache.doris.thrift.TBrokerCheckPathExistResponse;
@@ -151,7 +153,8 @@ public class BrokerUtil {
if (index == -1) {
continue;
}
- columns[index] = pair[1];
+ columns[index] =
HiveMetaStoreCache.HIVE_DEFAULT_PARTITION.equals(pair[1])
+ ? FeConstants.null_string : pair[1];
size++;
if (size >= columnsFromPath.size()) {
break;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index a1ab3cc92f..31b8d2f3b4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -80,6 +80,7 @@ import java.util.stream.Stream;
public class HiveMetaStoreCache {
private static final Logger LOG =
LogManager.getLogger(HiveMetaStoreCache.class);
private static final int MIN_BATCH_FETCH_PARTITION_NUM = 50;
+ public static final String HIVE_DEFAULT_PARTITION =
"__HIVE_DEFAULT_PARTITION__";
private HMSExternalCatalog catalog;
@@ -207,7 +208,7 @@ public class HiveMetaStoreCache {
for (String part : parts) {
String[] kv = part.split("=");
Preconditions.checkState(kv.length == 2, partitionName);
- values.add(new PartitionValue(kv[1]));
+ values.add(new PartitionValue(kv[1],
HIVE_DEFAULT_PARTITION.equals(kv[1])));
}
try {
PartitionKey key =
PartitionKey.createListPartitionKeyWithTypes(values, types);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
index fcae3c4ecd..5b8ec54b3a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java
@@ -79,6 +79,19 @@ public class ListPartitionPrunerV2 extends
PartitionPrunerV2Base {
this.rangeToId = rangeToId;
}
+ // For hive partition table.
+ public ListPartitionPrunerV2(Map<Long, PartitionItem> idToPartitionItem,
+ List<Column> partitionColumns,
+ Map<String, ColumnRange> columnNameToRange,
+ Map<UniqueId, Range<PartitionKey>>
uidToPartitionRange,
+ Map<Range<PartitionKey>, UniqueId> rangeToId,
+ RangeMap<ColumnBound, UniqueId>
singleColumnRangeMap,
+ boolean isHive) {
+ super(idToPartitionItem, partitionColumns, columnNameToRange,
singleColumnRangeMap, isHive);
+ this.uidToPartitionRange = uidToPartitionRange;
+ this.rangeToId = rangeToId;
+ }
+
public static Map<UniqueId, Range<PartitionKey>> genUidToPartitionRange(
Map<Long, PartitionItem> idToPartitionItem, Map<Long,
List<UniqueId>> idToUniqueIdsMap) {
Map<UniqueId, Range<PartitionKey>> uidToPartitionRange =
Maps.newHashMap();
@@ -147,6 +160,11 @@ public class ListPartitionPrunerV2 extends
PartitionPrunerV2Base {
Optional<RangeSet<ColumnBound>> rangeSetOpt =
columnRange.getRangeSet();
if (columnRange.hasConjunctiveIsNull() || !rangeSetOpt.isPresent()) {
+ // For Hive external table, partition column could be null.
+ // In which case, the data will be put to a default partition
__HIVE_DEFAULT_PARTITION__
+ if (isHive) {
+ return FinalFilters.noFilters();
+ }
return FinalFilters.constantFalseFilters();
} else {
RangeSet<ColumnBound> rangeSet = rangeSetOpt.get();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
index e760a85d84..376e2a4c7f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
@@ -43,6 +43,8 @@ public abstract class PartitionPrunerV2Base implements
PartitionPruner {
protected final Map<String, ColumnRange> columnNameToRange;
// used for single column partition
protected RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
+ // Flag to indicate if this pruner is for hive partition or not.
+ protected boolean isHive = false;
// currently only used for list partition
private Map.Entry<Long, PartitionItem> defaultPartition;
@@ -83,6 +85,18 @@ public abstract class PartitionPrunerV2Base implements
PartitionPruner {
.orElse(null);
}
+ public PartitionPrunerV2Base(Map<Long, PartitionItem> idToPartitionItem,
+ List<Column> partitionColumns,
+ Map<String, ColumnRange> columnNameToRange,
+ RangeMap<ColumnBound, UniqueId>
singleColumnRangeMap,
+ boolean isHive) {
+ this.idToPartitionItem = idToPartitionItem;
+ this.partitionColumns = partitionColumns;
+ this.columnNameToRange = columnNameToRange;
+ this.singleColumnRangeMap = singleColumnRangeMap;
+ this.isHive = isHive;
+ }
+
@Override
public Collection<Long> prune() throws AnalysisException {
Map<Column, FinalFilters> columnToFilters = Maps.newHashMap();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
index 4aa3ee41a5..8acba72f15 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java
@@ -86,6 +86,11 @@ public class RangePartitionPrunerV2 extends
PartitionPrunerV2Base {
Optional<RangeSet<ColumnBound>> rangeSetOpt =
columnRange.getRangeSet();
if (columnRange.hasConjunctiveIsNull()) {
if (!rangeSetOpt.isPresent()) {
+ // For Hive external table, partition column could be null.
+ // In which case, the data will be put to a default partition
__HIVE_DEFAULT_PARTITION__
+ if (isHive) {
+ return FinalFilters.noFilters();
+ }
// Only has conjunctive `is null` predicate.
return
FinalFilters.create(Sets.newHashSet(getMinInfinityRange(column)));
} else {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
index e69f9788d0..f538bf3c3e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
@@ -161,7 +161,8 @@ public class HiveScanProvider extends HMSTableScanProvider {
hmsTable.getPartitionColumns(), columnNameToRange,
hivePartitionValues.getUidToPartitionRange(),
hivePartitionValues.getRangeToId(),
- hivePartitionValues.getSingleColumnRangeMap());
+ hivePartitionValues.getSingleColumnRangeMap(),
+ true);
Collection<Long> filteredPartitionIds = pruner.prune();
this.readPartitionNum = filteredPartitionIds.size();
LOG.debug("hive partition fetch and prune for table {}.{}
cost: {} ms",
diff --git
a/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out
b/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out
new file mode 100644
index 0000000000..3737fbc7f6
--- /dev/null
+++
b/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out
@@ -0,0 +1,135 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !one_partition1 --
+1 1
+2 1
+3 2
+4 2
+5 \N
+6 \N
+
+-- !one_partition2 --
+5 \N
+6 \N
+
+-- !one_partition3 --
+1
+2
+3
+4
+
+-- !one_partition4 --
+1
+1
+2
+2
+
+-- !one_partition5 --
+4 2
+5 \N
+6 \N
+
+-- !two_partition1 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition2 --
+1 \N one
+2 \N one
+7 \N \N
+8 \N \N
+
+-- !two_partition3 --
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition4 --
+3 2 \N
+4 2 \N
+7 \N \N
+8 \N \N
+
+-- !two_partition5 --
+1 \N one
+2 \N one
+5 3 three
+6 3 three
+
+-- !two_partition6 --
+5 3 three
+6 3 three
+
+-- !two_partition7 --
+1 \N one
+2 \N one
+
+-- !two_partition8 --
+3 2 \N
+4 2 \N
+
+-- !two_partition9 --
+7 \N \N
+8 \N \N
+
+-- !two_partition10 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition11 --
+1 \N one
+2 \N one
+5 3 three
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition12 --
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition13 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+7 \N \N
+8 \N \N
+
+-- !two_partition14 --
+1 \N one
+2 \N one
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition15 --
+6 3 three
+7 \N \N
+8 \N \N
+
+-- !two_partition16 --
+3 2 \N
+4 2 \N
+5 3 three
+6 3 three
+
+-- !two_partition17 --
+1 \N one
+2 \N one
+
diff --git
a/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy
b/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy
new file mode 100644
index 0000000000..2deddb9d2d
--- /dev/null
+++
b/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_default_partition", "p2") {
+ def one_partition1 = """select * from one_partition order by id;"""
+ def one_partition2 = """select id, part1 from one_partition where part1 is
null order by id;"""
+ def one_partition3 = """select id from one_partition where part1 is not
null order by id;"""
+ def one_partition4 = """select part1 from one_partition where part1>0
order by id;"""
+ def one_partition5 = """select id, part1 from one_partition where part1 is
null or id>3 order by id;"""
+
+ def two_partition1 = """select * from two_partition order by id;"""
+ def two_partition2 = """select id, part1, part2 from two_partition where
part1 is null order by id;"""
+ def two_partition3 = """select id, part1, part2 from two_partition where
part1 is not null order by id;"""
+ def two_partition4 = """select id, part1, part2 from two_partition where
part2 is null order by id;"""
+ def two_partition5 = """select id, part1, part2 from two_partition where
part2 is not null order by id;"""
+ def two_partition6 = """select id, part1, part2 from two_partition where
part1 is not null and part2 is not null order by id;"""
+ def two_partition7 = """select id, part1, part2 from two_partition where
part1 is null and part2 is not null order by id;"""
+ def two_partition8 = """select id, part1, part2 from two_partition where
part1 is not null and part2 is null order by id;"""
+ def two_partition9 = """select id, part1, part2 from two_partition where
part1 is null and part2 is null order by id;"""
+ def two_partition10 = """select id, part1, part2 from two_partition where
part1 is not null or part2 is not null order by id;"""
+ def two_partition11 = """select id, part1, part2 from two_partition where
part1 is null or part2 is not null order by id;"""
+ def two_partition12 = """select id, part1, part2 from two_partition where
part1 is not null or part2 is null order by id;"""
+ def two_partition13 = """select id, part1, part2 from two_partition where
part1 is null or part2 is null order by id;"""
+ def two_partition14 = """select id, part1, part2 from two_partition where
part1 is not null or part2 is not null order by id;"""
+ def two_partition15 = """select id, part1, part2 from two_partition where
id > 5 order by id;"""
+ def two_partition16 = """select id, part1, part2 from two_partition where
part1>0 order by id;"""
+ def two_partition17 = """select id, part1, part2 from two_partition where
part2 = 'one' order by id;"""
+
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "hive_default_partition"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """use multi_catalog;"""
+ qt_one_partition1 one_partition1
+ qt_one_partition2 one_partition2
+ qt_one_partition3 one_partition3
+ qt_one_partition4 one_partition4
+ qt_one_partition5 one_partition5
+
+ qt_two_partition1 two_partition1
+ qt_two_partition2 two_partition2
+ qt_two_partition3 two_partition3
+ qt_two_partition4 two_partition4
+ qt_two_partition5 two_partition5
+ qt_two_partition6 two_partition6
+ qt_two_partition7 two_partition7
+ qt_two_partition8 two_partition8
+ qt_two_partition9 two_partition9
+ qt_two_partition10 two_partition10
+ qt_two_partition11 two_partition11
+ qt_two_partition12 two_partition12
+ qt_two_partition13 two_partition13
+ qt_two_partition14 two_partition14
+ qt_two_partition15 two_partition15
+ qt_two_partition16 two_partition16
+ qt_two_partition17 two_partition17
+
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]