This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a444cef5723 [opt](hive) skip hidden file and dir (#32412)
a444cef5723 is described below

commit a444cef5723efdd1e593dfa4c1e5cb949b7f0cef
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Tue Mar 19 14:26:10 2024 +0800

    [opt](hive) skip hidden file and dir (#32412)
    
    When query hive table, we should skip all hidden dirs and files, like:
    ```
    /visible/.hidden/path
    /visible/.hidden.txt
    ```
---
 .../parquet_all_types/.test_hidden_dir/wrong_file  |  1 +
 .../test_hidden_file/.hidden_file                  |  1 +
 .../doris/datasource/hive/HiveMetaStoreCache.java  | 24 ++++++++---
 .../apache/doris/datasource/PathVisibleTest.java   | 47 ++++++++++++++++++++++
 4 files changed, 68 insertions(+), 5 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/.test_hidden_dir/wrong_file
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/.test_hidden_dir/wrong_file
new file mode 100644
index 00000000000..d37af4bbc51
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/.test_hidden_dir/wrong_file
@@ -0,0 +1 @@
+wrong file
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/test_hidden_file/.hidden_file
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/test_hidden_file/.hidden_file
new file mode 100644
index 00000000000..136c05e0d02
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_all_types/test_hidden_file/.hidden_file
@@ -0,0 +1 @@
+hidden
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index f2a7019709d..23c9de3be1f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -50,6 +50,7 @@ import org.apache.doris.planner.ColumnBound;
 import org.apache.doris.planner.ListPartitionPrunerV2;
 import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.cache.CacheBuilder;
@@ -63,7 +64,6 @@ import com.google.common.collect.RangeMap;
 import com.google.common.collect.Streams;
 import com.google.common.collect.TreeRangeMap;
 import lombok.Data;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
@@ -1035,14 +1035,16 @@ public class HiveMetaStoreCache {
             this.acidInfo = acidInfo;
         }
 
-        private boolean isFileVisible(Path path) {
-            if (path == null || StringUtils.isEmpty(path.toString())) {
+        @VisibleForTesting
+        public static boolean isFileVisible(Path path) {
+            if (path == null) {
                 return false;
             }
-            if (path.getName().startsWith(".") || 
path.getName().startsWith("_")) {
+            String pathStr = path.toUri().toString();
+            if (containsHiddenPath(pathStr) || path.getName().startsWith("_")) 
{
                 return false;
             }
-            for (String name : path.toString().split("/")) {
+            for (String name : pathStr.split("/")) {
                 if (isGeneratedPath(name)) {
                     return false;
                 }
@@ -1050,6 +1052,18 @@ public class HiveMetaStoreCache {
             return true;
         }
 
+        private static boolean containsHiddenPath(String path) {
+            if (path.startsWith(".")) {
+                return true;
+            }
+            for (int i = 0; i < path.length() - 1; i++) {
+                if (path.charAt(i) == '/' && path.charAt(i + 1) == '.') {
+                    return true;
+                }
+            }
+            return false;
+        }
+
         private static boolean isGeneratedPath(String name) {
             return "_temporary".equals(name) // generated by spark
                     || "_imapala_insert_staging".equals(name) // generated by 
impala
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/PathVisibleTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/PathVisibleTest.java
new file mode 100644
index 00000000000..0937bbc3cc8
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/PathVisibleTest.java
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource;
+
+import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
+
+import org.apache.hadoop.fs.Path;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PathVisibleTest {
+    @Test
+    public void shouldReturnFalseWhenPathIsNull() {
+        Assert.assertFalse(FileCacheValue.isFileVisible(null));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("s3://visible/.hidden/path")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("/visible/.hidden/path")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("hdfs://visible/path/.file")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("/visible/path/_temporary_xx")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("/visible/path/_imapala_insert_staging")));
+
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("/visible//.hidden/path")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("s3://visible/.hidden/path")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("///visible/path/.file")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("/visible/path///_temporary_xx")));
+        Assert.assertFalse(FileCacheValue.isFileVisible(new 
Path("hdfs://visible//path/_imapala_insert_staging")));
+
+        Assert.assertTrue(FileCacheValue.isFileVisible(new 
Path("s3://visible/path")));
+        Assert.assertTrue(FileCacheValue.isFileVisible(new Path("path")));
+        Assert.assertTrue(FileCacheValue.isFileVisible(new 
Path("hdfs://visible/path./1.txt")));
+        Assert.assertTrue(FileCacheValue.isFileVisible(new Path("/1.txt")));
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to