This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9f12714d1cc7830c6ebb1759902facf6a298acbc
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Tue Jul 1 18:23:49 2025 +0200

    IMPALA-14224: Cleanup subdirectories in TRUNCATE
    
    If an external table contains data files in subdirectories, and
    recursive listing is enabled, Impala considers the files in the
    subdirectories as part of the table. However, currently INSERT OVERWRITE
    and TRUNCATE do not always delete these files, leading to data
    corruption.
    
    This change takes care of TRUNCATE.
    
    Currently TRUNCATE can be run in two different ways:
     - if the table is being replicated, the HMS api is used
     - otherwise catalogd deletes the files itself.
    Two differences between these methods are:
     - calling HMS leads to an ALTER_TABLE event
     - calling HMS leads to recursive delete while catalogd only
       deletes files directly in the partition/table directory.
    
    This commit introduces the '--truncate_external_tables_with_hms' startup
    flag, with default value 'true'. If this flag is set to true, Impala
    always uses the HMS api for TRUNCATE operations.
    
    Note that HMS always deletes stats on TRUNCATE, so setting the
    DELETE_STATS_IN_TRUNCATE query option to false is not supported if
    '--truncate_external_tables_with_hms' is set to true: an exception is
    thrown.
    
    Testing:
     - extended the tests in test_recursive_listing.py::TestRecursiveListing
       to include TRUNCATE
     - Moved tests with DELETE_STATS_IN_TRUNCATE=0 from truncate-table.test
       to truncate-table-no-delete-stats.test, which is run in a new custom
       cluster test (custom_cluster/test_no_delete_stats_in_truncate.py).
    
    Change-Id: Ic0fcc6cf1eca8a0bcf2f93dbb61240da05e35519
    Reviewed-on: http://gerrit.cloudera.org:8080/23166
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/catalog/catalog-server.cc                   |   4 +
 be/src/util/backend-gflag-util.cc                  |   2 +
 common/thrift/BackendGflags.thrift                 |   2 +
 .../org/apache/impala/service/BackendConfig.java   |   4 +
 .../apache/impala/service/CatalogOpExecutor.java   |  24 +++--
 .../QueryTest/truncate-table-no-delete-stats.test  | 107 ++++++++++++++++++++
 .../queries/QueryTest/truncate-table.test          | 108 +++------------------
 tests/custom_cluster/test_events_custom_configs.py |  24 ++---
 .../test_no_delete_stats_in_truncate.py            |  40 ++++++++
 tests/metadata/test_ddl.py                         |   4 +-
 tests/metadata/test_recursive_listing.py           |  15 ++-
 11 files changed, 214 insertions(+), 120 deletions(-)

diff --git a/be/src/catalog/catalog-server.cc b/be/src/catalog/catalog-server.cc
index f14406daa..44352901d 100644
--- a/be/src/catalog/catalog-server.cc
+++ b/be/src/catalog/catalog-server.cc
@@ -302,6 +302,10 @@ DEFINE_bool(keeps_warmup_tables_loaded, false,
     "--invalidate_tables_on_memory_pressure is turned on. Otherwise, these 
tables will "
     "keep being loaded and invalidated.");
 
+DEFINE_bool(truncate_external_tables_with_hms, true, "Always use HMS to 
truncate"
+    "external tables. When false, HMS api is only used for tables being 
replicated. Using"
+    "HMS has the effect of deleting files recursively and triggering an HMS 
event.");
+
 DECLARE_string(state_store_host);
 DECLARE_int32(state_store_port);
 DECLARE_string(state_store_2_host);
diff --git a/be/src/util/backend-gflag-util.cc 
b/be/src/util/backend-gflag-util.cc
index 1dfe632f3..90385f0f4 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -147,6 +147,7 @@ DECLARE_int32(reset_metadata_lock_duration_ms);
 DECLARE_int32(catalog_reset_max_threads);
 DECLARE_string(warmup_tables_config_file);
 DECLARE_bool(keeps_warmup_tables_loaded);
+DECLARE_bool(truncate_external_tables_with_hms);
 
 // HS2 SAML2.0 configuration
 // Defined here because TAG_FLAG caused issues in global-flags.cc
@@ -552,6 +553,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
   cfg.__set_catalog_reset_max_threads(FLAGS_catalog_reset_max_threads);
   cfg.__set_warmup_tables_config_file(FLAGS_warmup_tables_config_file);
   cfg.__set_keeps_warmup_tables_loaded(FLAGS_keeps_warmup_tables_loaded);
+  
cfg.__set_truncate_external_tables_with_hms(FLAGS_truncate_external_tables_with_hms);
   return Status::OK();
 }
 
diff --git a/common/thrift/BackendGflags.thrift 
b/common/thrift/BackendGflags.thrift
index ba42add11..445a40dc2 100644
--- a/common/thrift/BackendGflags.thrift
+++ b/common/thrift/BackendGflags.thrift
@@ -349,4 +349,6 @@ struct TBackendGflags {
   158: required string warmup_tables_config_file
 
   159: required bool keeps_warmup_tables_loaded
+
+  160: required bool truncate_external_tables_with_hms
 }
diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java 
b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index af81e4812..a2b44df2b 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -597,4 +597,8 @@ public class BackendConfig {
   public boolean keepsWarmupTablesLoaded() {
     return backendCfg_.keeps_warmup_tables_loaded;
   }
+
+  public boolean truncateExternalTablesWithHms() {
+    return backendCfg_.truncate_external_tables_with_hms;
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java 
b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index f16e7b33f..76040125f 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -3655,15 +3655,25 @@ public class CatalogOpExecutor {
     catalog_.getLock().writeLock().unlock();
     modification.addCatalogServiceIdentifiersToTable();
     HdfsTable hdfsTable = (HdfsTable) table;
-    boolean isTableBeingReplicated = false;
+    boolean truncateWithHms = 
BackendConfig.INSTANCE.truncateExternalTablesWithHms();
     Stopwatch sw = Stopwatch.createStarted();
+
+    if (truncateWithHms && !params.isDelete_stats()) {
+      throw new ImpalaRuntimeException("Setting the query option "
+          + "'DELETE_STATS_IN_TRUNCATE' to false is not supported when the 
flag "
+          + "'--truncate_external_tables_with_hms' is set to true.");
+    }
+
     try {
-      // if the table is being replicated we issue the HMS API to truncate the 
table
-      // since it generates additional events which are used by Hive 
Replication.
       try (MetaStoreClient client = 
catalog_.getMetaStoreClient(catalogTimeline)) {
-        if (isTableBeingReplicated(client.getHiveClient(), hdfsTable)) {
-          isTableBeingReplicated = true;
-          // We will issue HMS API in these cases. Register in-flight event 
before we do.
+        if (!truncateWithHms) {
+          // if the table is being replicated we issue the HMS API to truncate 
the table
+          // since it generates additional events which are used by Hive 
Replication.
+          truncateWithHms = isTableBeingReplicated(client.getHiveClient(), 
hdfsTable);
+        }
+
+        if (truncateWithHms) {
+          // We will issue an HMS API call. Register in-flight event before we 
do.
           modification.registerInflightEvent();
           String dbName = 
Preconditions.checkNotNull(hdfsTable.getDb()).getName();
           client.getHiveClient()
@@ -3673,7 +3683,7 @@ public class CatalogOpExecutor {
               hdfsTable.getFullName(), sw.elapsed(TimeUnit.MILLISECONDS));
         }
       }
-      if (!isTableBeingReplicated) {
+      if (!truncateWithHms) {
         // when table is replicated we let the HMS API handle the file 
deletion logic
         // otherwise we delete the files.
         Collection<? extends FeFsPartition> parts = 
hdfsTable.loadAllPartitions();
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test
 
b/testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test
new file mode 100644
index 000000000..b69ff3e23
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test
@@ -0,0 +1,107 @@
+====
+---- QUERY
+# First create a partitioned table
+create table t4 like functional.alltypes
+location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t4';
+insert into t4 partition(year, month) select * from functional.alltypes;
+compute incremental stats t4;
+# if DELETE_STATS_IN_TRUNCATE is unset then truncate should not
+# delete the table statistics.
+truncate table t4;
+show table stats t4;
+---- LABELS
+YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, 
INCREMENTAL STATS, LOCATION, EC POLICY
+---- RESULTS
+'2009','1',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','2',280,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','3',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','4',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','5',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','6',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','7',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','8',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','9',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','10',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','11',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2009','12',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','1',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','2',280,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','3',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','4',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','5',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','6',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','7',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','8',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','9',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','10',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','11',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'2010','12',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
+'Total','',7300,0,'0B','0B','','','','',''
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, 
STRING, STRING
+====
+---- QUERY
+show column stats t4;
+---- LABELS
+COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
+---- RESULTS
+'id','INT',7300,0,4,4,-1,-1
+'bool_col','BOOLEAN',2,0,1,1,3650,3650
+'tinyint_col','TINYINT',10,0,1,1,-1,-1
+'smallint_col','SMALLINT',10,0,2,2,-1,-1
+'int_col','INT',10,0,4,4,-1,-1
+'bigint_col','BIGINT',10,0,8,8,-1,-1
+'float_col','FLOAT',10,0,4,4,-1,-1
+'double_col','DOUBLE',10,0,8,8,-1,-1
+'date_string_col','STRING',736,0,8,8,-1,-1
+'string_col','STRING',10,0,1,1,-1,-1
+'timestamp_col','TIMESTAMP',7300,0,16,16,-1,-1
+'year','INT',2,0,4,4,-1,-1
+'month','INT',12,0,4,4,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+# Verify that truncate was successful
+select count(*) from t4;
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is
+# unset truncation removes all files
+# but does not delete table and column stats.
+create table t6 like functional.tinytable
+location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t6';
+insert into t6 select * from functional.tinytable;
+compute incremental stats t6;
+truncate table t6;
+show table stats t6;
+---- LABELS
+#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL 
STATS, LOCATION, EC POLICY
+---- RESULTS
+3,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','false',regex:.*,'$ERASURECODE_POLICY'
+---- TYPES
+BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
+====
+---- QUERY
+# Show that the truncation did not remove the column stats.
+show column stats t6;
+---- LABELS
+COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
+---- RESULTS
+'a','STRING',3,0,8,6.666666507720947,-1,-1
+'b','STRING',3,0,7,4,-1,-1
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
+====
+---- QUERY
+# Verify that truncate was successful
+select count(*) from t6;
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test 
b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test
index 084ed75c1..c8976da29 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test
@@ -199,74 +199,15 @@ create table t4 like functional.alltypes
 location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t4';
 insert into t4 partition(year, month) select * from functional.alltypes;
 compute incremental stats t4;
-# if DELETE_STATS_IN_TRUNCATE is unset then truncate should not
-# delete the table statistics.
+# DELETE_STATS_IN_TRUNCATE=false is unsupported unless
+# '--truncate_external_tables_with_hms=false' (for that case, see
+# custom_cluster/test_no_delete_stats_in_truncate.py).
 set DELETE_STATS_IN_TRUNCATE=0;
 truncate table t4;
-show table stats t4;
----- LABELS
-YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, 
INCREMENTAL STATS, LOCATION, EC POLICY
----- RESULTS
-'2009','1',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','2',280,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','3',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','4',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','5',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','6',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','7',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','8',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','9',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','10',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','11',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2009','12',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','1',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','2',280,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','3',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','4',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','5',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','6',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','7',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','8',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','9',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','10',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','11',300,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'2010','12',310,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
-'Total','',7300,0,'0B','0B','','','','',''
----- TYPES
-STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, 
STRING, STRING
-====
----- QUERY
-show column stats t4;
----- LABELS
-COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
----- RESULTS
-'id','INT',7300,0,4,4,-1,-1
-'bool_col','BOOLEAN',2,0,1,1,3650,3650
-'tinyint_col','TINYINT',10,0,1,1,-1,-1
-'smallint_col','SMALLINT',10,0,2,2,-1,-1
-'int_col','INT',10,0,4,4,-1,-1
-'bigint_col','BIGINT',10,0,8,8,-1,-1
-'float_col','FLOAT',10,0,4,4,-1,-1
-'double_col','DOUBLE',10,0,8,8,-1,-1
-'date_string_col','STRING',736,0,8,8,-1,-1
-'string_col','STRING',10,0,1,1,-1,-1
-'timestamp_col','TIMESTAMP',7300,0,16,16,-1,-1
-'year','INT',2,0,4,4,-1,-1
-'month','INT',12,0,4,4,-1,-1
----- TYPES
-STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
-====
----- QUERY
-#Verify that truncate was successful
-select count(*) from t4;
----- RESULTS
-0
----- TYPES
-BIGINT
+---- CATCH
+ImpalaRuntimeException: Setting the query option 'DELETE_STATS_IN_TRUNCATE' to 
false is not supported when the flag '--truncate_external_tables_with_hms' is 
set to true.
 ====
 ---- QUERY
-insert into t4 partition(year, month) select * from functional.alltypes;
-compute incremental stats t4;
 # if DELETE_STATS_IN_TRUNCATE is set then truncate should
 # delete the statistics.
 set DELETE_STATS_IN_TRUNCATE=1;
@@ -334,47 +275,20 @@ select count(*) from t4;
 BIGINT
 ====
 ---- QUERY
-# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is
-# unset truncation removes all files
-# but does not delete table and column stats.
+# Unpartitioned table case:
+# DELETE_STATS_IN_TRUNCATE=false is unsupported unless
+# '--truncate_external_tables_with_hms=false' (for that case, see
+# custom_cluster/test_no_delete_stats_in_truncate.py).
 create table t6 like functional.tinytable
 location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t6';
 insert into t6 select * from functional.tinytable;
 compute incremental stats t6;
 set DELETE_STATS_IN_TRUNCATE=0;
 truncate table t6;
-show table stats t6;
----- LABELS
-#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL 
STATS, LOCATION, EC POLICY
----- RESULTS
-3,0,'0B','NOT CACHED','NOT 
CACHED','TEXT','false',regex:.*,'$ERASURECODE_POLICY'
----- TYPES
-BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
-====
----- QUERY
-# Show that the truncation removed the column stats.
-show column stats t6;
----- LABELS
-COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
----- RESULTS
-'a','STRING',3,0,8,6.666666507720947,-1,-1
-'b','STRING',3,0,7,4,-1,-1
----- TYPES
-STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
-====
----- QUERY
-#Verify that truncate was successful
-select count(*) from t6;
----- RESULTS
-0
----- TYPES
-BIGINT
+---- CATCH
+ImpalaRuntimeException: Setting the query option 'DELETE_STATS_IN_TRUNCATE' to 
false is not supported when the flag '--truncate_external_tables_with_hms' is 
set to true.
 ====
 ---- QUERY
-# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is
-# set truncation removes all files and deletes stats.
-insert into t6 select * from functional.tinytable;
-compute incremental stats t6;
 # table stats should be deleted
 set DELETE_STATS_IN_TRUNCATE=1;
 truncate table t6;
diff --git a/tests/custom_cluster/test_events_custom_configs.py 
b/tests/custom_cluster/test_events_custom_configs.py
index bcf86c9fc..a13686700 100644
--- a/tests/custom_cluster/test_events_custom_configs.py
+++ b/tests/custom_cluster/test_events_custom_configs.py
@@ -149,14 +149,7 @@ class 
TestEventProcessingCustomConfigsBase(CustomClusterTestSuite):
         # insert overwrite query from Impala also generates a INSERT self-event
         "insert overwrite table {0}.{1} partition "
         "(year, month) select * from functional.alltypessmall where year=2009 "
-        "and month=1".format(db_name, tbl_name),
-        # events processor doesn't process delete column stats events 
currently,
-        # however, in case of incremental stats, there could be alter table and
-        # alter partition events which should be ignored. Hence we run compute 
stats
-        # before to make sure that the truncate table command generated alter 
events
-        # are ignored.
-        "compute incremental stats {0}.{1}".format(db_name, tbl_name),
-        "{0} {1}.{2}".format(TRUNCATE_TBL_STMT, db_name, tbl_name)],
+        "and month=1".format(db_name, tbl_name)],
       False: [
         "create table {0}.{1} like functional.alltypessmall "
         "stored as parquet".format(db_name, tbl_name),
@@ -223,7 +216,14 @@ class 
TestEventProcessingCustomConfigsBase(CustomClusterTestSuite):
         "insert overwrite {0}.{1} partition(part) select * from 
{0}.{1}".format(
           db_name, acid_tbl_name),
         # recover partitions will generate add_partition events
-        "alter table {0}.{1} recover partitions".format(db_name, 
recover_tbl_name)
+        "alter table {0}.{1} recover partitions".format(db_name, 
recover_tbl_name),
+        # events processor doesn't process delete column stats events 
currently,
+        # however, in case of incremental stats, there could be alter table and
+        # alter partition events which should be ignored. Hence we run compute 
stats
+        # before to make sure that the truncate table command generated alter 
events
+        # are ignored.
+        "compute incremental stats {0}.{1}".format(db_name, tbl_name),
+        "{0} {1}.{2}".format(TRUNCATE_TBL_STMT, db_name, tbl_name)
       ]
     }
     return self_event_test_queries
@@ -337,8 +337,10 @@ class 
TestEventProcessingCustomConfigsBase(CustomClusterTestSuite):
       if (TRUNCATE_TBL_STMT not in stmt):
         assert tbls_refreshed == tbls_refreshed_after, \
           "Failing query(impala={}): {}".format(use_impala_client, stmt)
-      assert partitions_refreshed == partitions_refreshed_after, \
-        "Failing query(impala={}): {}".format(use_impala_client, stmt)
+      # TRUNCATE refreshes partitions
+      if (TRUNCATE_TBL_STMT not in stmt):
+        assert partitions_refreshed == partitions_refreshed_after, \
+          "Failing query(impala={}): {}".format(use_impala_client, stmt)
     else:
       # hive was used to run the stmts, any events generated should not have 
been deemed
       # as self events unless there are empty partition add/drop events
diff --git a/tests/custom_cluster/test_no_delete_stats_in_truncate.py 
b/tests/custom_cluster/test_no_delete_stats_in_truncate.py
new file mode 100644
index 000000000..c7b922d4b
--- /dev/null
+++ b/tests/custom_cluster/test_no_delete_stats_in_truncate.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env impala-python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import pytest
+
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+
+
[email protected]_args(
+  catalogd_args="--truncate_external_tables_with_hms=false",
+  cluster_size=1)
+class TestNoDeleteStatsInTruncate(CustomClusterTestSuite):
+  @classmethod
+  def setup_class(cls):
+    if cls.exploration_strategy() != 'exhaustive':
+      pytest.skip('runs only in exhaustive')
+    super(TestNoDeleteStatsInTruncate, cls).setup_class()
+
+  def test_stats_remain_after_truncate(self, unique_database, vector):
+    vector.get_value('exec_option')['delete_stats_in_truncate'] = False
+    self.run_test_case('QueryTest/truncate-table-no-delete-stats', vector,
+        use_db=unique_database)
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 867fe6528..de9753014 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -177,10 +177,10 @@ class TestDdlStatements(TestDdlBase):
       assert len(self.filesystem_client.ls(
           "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 2
 
-      # Truncating the table removes the data files and preserves the table's 
directory
+      # Truncating the table removes the data files and the staging directory
       self.client.execute("truncate table {0}.t1".format(unique_database))
       assert len(self.filesystem_client.ls(
-          "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 1
+          "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 0
 
       self.client.execute(
           "create table {0}.t2(i int) partitioned by (p 
int)".format(unique_database))
diff --git a/tests/metadata/test_recursive_listing.py 
b/tests/metadata/test_recursive_listing.py
index 8b00cc273..9e064a4ad 100644
--- a/tests/metadata/test_recursive_listing.py
+++ b/tests/metadata/test_recursive_listing.py
@@ -40,9 +40,6 @@ class TestRecursiveListing(ImpalaTestSuite):
     cls.ImpalaTestMatrix.clear_dimension('exec_option')
     cls.ImpalaTestMatrix.add_dimension(
         create_uncompressed_text_dimension(cls.get_workload()))
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        (v.get_value('table_format').file_format == 'text'
-         and v.get_value('table_format').compression_codec == 'none'))
 
   def _show_files(self, table):
     files = self.client.execute("show files in {0}".format(table))
@@ -142,6 +139,18 @@ class TestRecursiveListing(ImpalaTestSuite):
     assert len(self._show_files(fq_tbl_name)) == 1
     assert len(self._get_rows(fq_tbl_name)) == 1
 
+    # Verify that TRUNCATE removes data files in subdirectories too.
+    # Regression test for IMPALA-13778.
+    self.filesystem_client.make_dir("{0}/dir1".format(part_path))
+    self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path), 
"file1")
+    self.execute_query_expect_success(self.client, "refresh 
{0}".format(fq_tbl_name))
+    assert len(self._show_files(fq_tbl_name)) == 2
+    assert len(self._get_rows(fq_tbl_name)) == 2
+
+    self.execute_query_expect_success(self.client, "truncate 
{0}".format(fq_tbl_name))
+    assert len(self._show_files(fq_tbl_name)) == 0
+    assert len(self._get_rows(fq_tbl_name)) == 0
+
   @SkipIfFS.no_partial_listing
   @pytest.mark.execute_serially
   def test_large_staging_dirs(self, unique_database):

Reply via email to