This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit e1896d4bf8d9568c277e99b664fa9abe8d4f6271 Author: Joe McDonnell <[email protected]> AuthorDate: Mon Sep 22 17:33:56 2025 -0700 IMPALA-14258: Disable tuple caching for Full Hive ACID tables TestAcidRowValidation.test_row_validation fails with tuple caching correction verification. The test creates a Full Hive ACID table with a file using valid write ids, mimicking a streaming ingest. As the valid write ids change, the scan of that file produces different rows without the file changing. Tuple caching currently doesn't understand valid write ids, so this produces incorrect results. This marks Full Hive ACID tables as ineligible for caching until valid write ids can be supported properly. Insert-only tables are still eligible. Testing: - Added test cases to TupleCacheTest - Ran TestAcidRowValidation.test_row_validation with correctness verification Change-Id: Icab9613b8e2973aed1d34427c51d2fd8b37a9aba Reviewed-on: http://gerrit.cloudera.org:8080/23454 Reviewed-by: Yida Wu <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Michael Smith <[email protected]> --- fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java | 10 ++++++++++ fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java b/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java index bcaf81d93..0f8ee124b 100644 --- a/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java +++ b/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java @@ -45,6 +45,7 @@ import org.apache.impala.thrift.TScanRangeSpec; import org.apache.impala.thrift.TSlotDescriptor; import org.apache.impala.thrift.TTableName; import org.apache.impala.thrift.TTupleDescriptor; +import org.apache.impala.util.AcidUtils; import org.apache.thrift.TBase; import org.apache.thrift.TSerializer; import org.apache.thrift.protocol.TBinaryProtocol; @@ -107,6 +108,7 @@ public class TupleCacheInfo { NONDETERMINISTIC_FN, MERGING_EXCHANGE, PARTITIONED_EXCHANGE, + FULL_ACID, } private EnumSet<IneligibilityReason> ineligibilityReasons_; @@ -532,6 +534,14 @@ public class TupleCacheInfo { "registerTable() only applies to base tables"); Preconditions.checkState(tbl != null, "Invalid null argument to registerTable()"); + // IMPALA-14258: Tuple caching does not support Full Hive ACID tables, as it does + // not yet support handling valid write ids. + if (tbl.getMetaStoreTable() != null && + AcidUtils.isFullAcidTable(tbl.getMetaStoreTable().getParameters())) { + setIneligible(IneligibilityReason.FULL_ACID); + return; + } + // Right now, we only hash the database / table name. TTableName tblName = tbl.getTableName().toThrift(); hashThrift("Table", tblName); diff --git a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java index 7b6d3192b..516168720 100644 --- a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java +++ b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java @@ -252,6 +252,14 @@ public class TupleCacheTest extends PlannerTestBase { verifyCacheIneligible("select id from functional_kudu.alltypes"); verifyCacheIneligible("select id from functional_hbase.alltypes"); + // Caching for Full Hive ACID is not implemented due to complications + // with valid write ids. ORC tables are loaded as Full ACID tables. + verifyCacheIneligible("select count(*) from functional_orc_def.alltypes"); + // Hive ACID insert-only tables are eligible + verifyAllEligible( + "select count(*) from functional_parquet.insert_only_major_and_minor_compacted", + /* isDistributedPlan */ false); + // Runtime filter produced by Kudu table is not implemented verifyCacheIneligible("select a.id from functional.alltypes a, " + "functional_kudu.alltypes b where a.id = b.id");
