This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a9db5f1d72d637b499b670bb5442973f8ae3b116
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Fri Feb 21 16:53:14 2025 +0100

    IMPALA-13778: Update ignored_dir_prefix_list
    
    There are some missing prefixes that are not yet ignored during
    recursive listing:
    - -tmp. (Hive changed to this new tmp dir from _tmp. in HIVE-27536)
    - _impala_insert_staging
    
    Impala may bump to these often so it is better to ignore them
    by default.
    
    Change-Id: I023244525dd333af1c5bfbad06708f3ec86aeacf
    Reviewed-on: http://gerrit.cloudera.org:8080/22518
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/util/backend-gflag-util.cc                      |  3 ++-
 .../java/org/apache/impala/common/FileSystemUtil.java  |  4 ++++
 tests/metadata/test_recursive_listing.py               | 18 ++++++++++--------
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/be/src/util/backend-gflag-util.cc 
b/be/src/util/backend-gflag-util.cc
index 292a050db..ccc8feb95 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -222,7 +222,8 @@ DEFINE_bool(use_hms_column_order_for_hbase_tables, false,
     "Use the column order in HMS for HBase tables instead of ordering the 
columns by "
     "family/qualifier. Keeping the default as false for backward 
compatibility.");
 
-DEFINE_string(ignored_dir_prefix_list, ".,_tmp.,_spark_metadata",
+DEFINE_string(ignored_dir_prefix_list,
+    ".,_tmp.,-tmp.,_spark_metadata,_impala_insert_staging",
     "Comma separated list to specify the prefix for tmp/staging dirs that 
catalogd should"
     " skip in loading file metadata.");
 
diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java 
b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
index 1d8d8c437..e17880db1 100644
--- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
@@ -1044,7 +1044,9 @@ public class FileSystemUtil {
 
   public static final String DOT = ".";
   public static final String HIVE_TEMP_FILE_PREFIX = "_tmp.";
+  public static final String HIVE_NEW_TEMP_FILE_PREFIX = "-tmp.";
   public static final String SPARK_TEMP_FILE_PREFIX = "_spark_metadata";
+  public static final String IMPALA_STAGING_DIR_PREFIX = 
"_impala_insert_staging";
 
   /**
    * Prefix string used by tools like hive/spark/flink to write certain 
temporary or
@@ -1059,7 +1061,9 @@ public class FileSystemUtil {
         || BackendConfig.INSTANCE.getIgnoredDirPrefixList() == null) {
       TMP_DIR_PREFIX_LIST.add(DOT);
       TMP_DIR_PREFIX_LIST.add(HIVE_TEMP_FILE_PREFIX);
+      TMP_DIR_PREFIX_LIST.add(HIVE_NEW_TEMP_FILE_PREFIX);
       TMP_DIR_PREFIX_LIST.add(SPARK_TEMP_FILE_PREFIX);
+      TMP_DIR_PREFIX_LIST.add(IMPALA_STAGING_DIR_PREFIX);
       LOG.warn("BackendConfig.INSTANCE uninitialized. Use hard-coded 
prefix-list.");
     } else {
       String s = BackendConfig.INSTANCE.getIgnoredDirPrefixList();
diff --git a/tests/metadata/test_recursive_listing.py 
b/tests/metadata/test_recursive_listing.py
index 9ec8bc135..957575fb2 100644
--- a/tests/metadata/test_recursive_listing.py
+++ b/tests/metadata/test_recursive_listing.py
@@ -108,14 +108,16 @@ class TestRecursiveListing(ImpalaTestSuite):
 
     # Create files in the nested hidden directories and refresh. Make sure it 
does not
     # show up
-    self.filesystem_client.make_dir("{0}/.hive-staging".format(part_path))
-    self.filesystem_client.create_file(
-        "{0}/.hive-staging/file3.txt".format(part_path),
-        "data-should-be-ignored-by-impala")
-    self.filesystem_client.make_dir("{0}/_tmp.base_000000_1".format(part_path))
-    self.filesystem_client.create_file(
-        "{0}/_tmp.base_000000_1/000000_0.manifest".format(part_path),
-        "manifest-file_contents")
+    dir_file_list = [
+      (".hive-staging", "file3.txt"),
+      ("_tmp.base_000000_1", "000000_0.manifest"),
+      ("-tmp.base_000000_1", "000000_0.manifest"),
+      ("_impala_insert_staging", 
"bc4e15747fc7d788-f632b3b300000000_944410164_data.0.txt")
+    ]
+    for (dir, file) in dir_file_list:
+      self.filesystem_client.make_dir("{0}/{1}".format(part_path, dir))
+      self.filesystem_client.create_file(
+          "{0}/{1}/{2}".format(part_path, dir, file), "shouldntreadthis")
     self.execute_query_expect_success(self.client, "refresh 
{0}".format(fq_tbl_name))
     assert len(self._show_files(fq_tbl_name)) == 3
     assert len(self._get_rows(fq_tbl_name)) == 3

Reply via email to