This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new a444cef5723 [opt](hive) skip hidden file and dir (#32412) a444cef5723 is described below commit a444cef5723efdd1e593dfa4c1e5cb949b7f0cef Author: Mingyu Chen <morning...@163.com> AuthorDate: Tue Mar 19 14:26:10 2024 +0800 [opt](hive) skip hidden file and dir (#32412) When query hive table, we should skip all hidden dirs and files, like: ``` /visible/.hidden/path /visible/.hidden.txt ``` --- .../parquet_all_types/.test_hidden_dir/wrong_file | 1 + .../test_hidden_file/.hidden_file | 1 + .../doris/datasource/hive/HiveMetaStoreCache.java | 24 ++++++++--- .../apache/doris/datasource/PathVisibleTest.java | 47 ++++++++++++++++++++++ 4 files changed, 68 insertions(+), 5 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/.test_hidden_dir/wrong_file b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/.test_hidden_dir/wrong_file new file mode 100644 index 00000000000..d37af4bbc51 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/.test_hidden_dir/wrong_file @@ -0,0 +1 @@ +wrong file diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/test_hidden_file/.hidden_file b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/test_hidden_file/.hidden_file new file mode 100644 index 00000000000..136c05e0d02 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/test_hidden_file/.hidden_file @@ -0,0 +1 @@ +hidden diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index f2a7019709d..23c9de3be1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -50,6 +50,7 @@ import org.apache.doris.planner.ColumnBound; import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.cache.CacheBuilder; @@ -63,7 +64,6 @@ import com.google.common.collect.RangeMap; import com.google.common.collect.Streams; import com.google.common.collect.TreeRangeMap; import lombok.Data; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; @@ -1035,14 +1035,16 @@ public class HiveMetaStoreCache { this.acidInfo = acidInfo; } - private boolean isFileVisible(Path path) { - if (path == null || StringUtils.isEmpty(path.toString())) { + @VisibleForTesting + public static boolean isFileVisible(Path path) { + if (path == null) { return false; } - if (path.getName().startsWith(".") || path.getName().startsWith("_")) { + String pathStr = path.toUri().toString(); + if (containsHiddenPath(pathStr) || path.getName().startsWith("_")) { return false; } - for (String name : path.toString().split("/")) { + for (String name : pathStr.split("/")) { if (isGeneratedPath(name)) { return false; } @@ -1050,6 +1052,18 @@ public class HiveMetaStoreCache { return true; } + private static boolean containsHiddenPath(String path) { + if (path.startsWith(".")) { + return true; + } + for (int i = 0; i < path.length() - 1; i++) { + if (path.charAt(i) == '/' && path.charAt(i + 1) == '.') { + return true; + } + } + return false; + } + private static boolean isGeneratedPath(String name) { return "_temporary".equals(name) // generated by spark || "_imapala_insert_staging".equals(name) // generated by impala diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/PathVisibleTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/PathVisibleTest.java new file mode 100644 index 00000000000..0937bbc3cc8 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/PathVisibleTest.java @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource; + +import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue; + +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Test; + +public class PathVisibleTest { + @Test + public void shouldReturnFalseWhenPathIsNull() { + Assert.assertFalse(FileCacheValue.isFileVisible(null)); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("s3://visible/.hidden/path"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/.hidden/path"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("hdfs://visible/path/.file"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path/_temporary_xx"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path/_imapala_insert_staging"))); + + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible//.hidden/path"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("s3://visible/.hidden/path"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("///visible/path/.file"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path///_temporary_xx"))); + Assert.assertFalse(FileCacheValue.isFileVisible(new Path("hdfs://visible//path/_imapala_insert_staging"))); + + Assert.assertTrue(FileCacheValue.isFileVisible(new Path("s3://visible/path"))); + Assert.assertTrue(FileCacheValue.isFileVisible(new Path("path"))); + Assert.assertTrue(FileCacheValue.isFileVisible(new Path("hdfs://visible/path./1.txt"))); + Assert.assertTrue(FileCacheValue.isFileVisible(new Path("/1.txt"))); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org