This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit de0e417d3e33348c35555c7d4133a466476880b0 Author: stiga-huang <[email protected]> AuthorDate: Tue Dec 30 23:30:45 2025 +0800 IMPALA-14646: StorageDescriptor normalization should deal with parameters When checking whether an ALTER_TABLE event has trivial changes in StorageDescriptor, we normalize fields that are unrelated to file metadata, e.g. cols, bucketCols, sortCols, etc. However, the parameters map of StorageDescriptor is not normalized, which causes null and empty map be considered as different. This patch adds the normalization on the parameters map of StorageDescriptor. Also improves the logs when non-trival SD changes is detected. Currently we just dump the full SD objects, which is pretty verbose and hard to analyze. This patches add logs to show the actual changes. Tests - Added FE test Change-Id: I6a9fcf2d60a41e9669d49412d49a2416c13d17bc Reviewed-on: http://gerrit.cloudera.org:8080/23814 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../impala/catalog/events/MetastoreEvents.java | 57 +++++++++++++++++++--- .../events/MetastoreEventsProcessorTest.java | 17 +++++++ 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java index bb9a447ce..23b78500b 100644 --- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java +++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java @@ -2315,15 +2315,55 @@ public class MetastoreEvents { Preconditions.checkNotNull(afterSd, "afterSd is null"); StorageDescriptor previousSD = normalizeStorageDescriptor(beforeSd.deepCopy()); StorageDescriptor currentSD = normalizeStorageDescriptor(afterSd.deepCopy()); - if (!Objects.equals(previousSD, currentSD)) { + if (Objects.equals(previousSD, currentSD)) { + infoLog("Ignored trivial changes in table storage descriptor (SD) of table " + + "{}.{}.", dbName_, tblName_); + return true; + } + boolean foundChange = false; + if (!Objects.equals(previousSD.getLocation(), currentSD.getLocation())) { + foundChange = true; + infoLog("Location changed from '{}' to '{}'", + previousSD.getLocation(), currentSD.getLocation()); + } + if (!Objects.equals(previousSD.getInputFormat(), currentSD.getInputFormat())) { + foundChange = true; + infoLog("InputFormat changed from '{}' to '{}'", + previousSD.getInputFormat(), currentSD.getInputFormat()); + } + if (!Objects.equals(previousSD.getOutputFormat(), currentSD.getOutputFormat())) { + foundChange = true; + infoLog("OutputFormat changed from '{}' to '{}'", + previousSD.getOutputFormat(), currentSD.getOutputFormat()); + } + if (!Objects.equals(previousSD.getSerdeInfo(), currentSD.getSerdeInfo())) { + foundChange = true; + infoLog("SerdeInfo changed from '{}' to '{}'", + previousSD.getSerdeInfo(), currentSD.getSerdeInfo()); + } + // normalizeStorageDescriptor() makes sure storedAsSubDirectories is set. + if (previousSD.isStoredAsSubDirectories() != currentSD.isStoredAsSubDirectories()) { + foundChange = true; + infoLog("StoredAsSubDirectories changed from '{}' to '{}'", + previousSD.isStoredAsSubDirectories(), currentSD.isStoredAsSubDirectories()); + } + if (!Objects.equals(previousSD.getParameters(), currentSD.getParameters())) { + foundChange = true; + infoLog("Parameters changed from '{}' to '{}'", + previousSD.getParameters(), currentSD.getParameters()); + } + if (foundChange) { infoLog("Non-trivial change in table storage descriptor (SD) detected for " + - "table {}.{}. So file metadata should be reloaded. SD before: {}, SD " + - "after: {}", dbName_, tblName_, beforeSd.toString(), afterSd.toString()); - return false; + "table {}.{}. So file metadata should be reloaded.", dbName_, tblName_); + } else { + // New hive versions might add new fields to StorageDescriptor that we don't + // normalize or miss in above checks. So we log the full SD objects here. + infoLog("Non-trivial change in table storage descriptor (SD) detected for " + + "table {}.{}. So file metadata should be reloaded. SD before: " + + "{}, SD after: {}", + dbName_, tblName_, beforeSd.toString(), afterSd.toString()); } - infoLog("Trivial changes in table storage descriptor (SD) detected for table " + - "{}.{}. So file metadata reload can be skipped.", dbName_, tblName_); - return true; + return false; } /** @@ -2343,6 +2383,9 @@ public class MetastoreEvents { if (!sd.isSetStoredAsSubDirectories()) { sd.setStoredAsSubDirectories(false); } + if (sd.isSetParameters() && sd.getParameters().isEmpty()) { + sd.unsetParameters(); + } return sd; } diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java index eaed60b45..7644cab10 100644 --- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java +++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java @@ -3221,6 +3221,23 @@ public class MetastoreEventsProcessorTest { fileMetadataLoadBefore = fileMetadataLoadAfter; fileMetadataLoadAfter = processAlterTableAndReturnMetric(testTblName, hmsTbl); assertEquals(fileMetadataLoadBefore + 1, fileMetadataLoadAfter); + + // Test 10: Trivial change in SD parameters + LOG.info("Test SD parameters change from unset to empty"); + hmsTbl = tbl.getMetaStoreTable().deepCopy(); + hmsTbl.getSd().unsetParameters(); + fileMetadataLoadBefore = processAlterTableAndReturnMetric(testTblName, hmsTbl); + // From unset to empty + hmsTbl.getSd().setParameters(new HashMap<>()); + fileMetadataLoadAfter = processAlterTableAndReturnMetric(testTblName, hmsTbl); + assertEquals("SD parameters changed from unset to empty should not trigger reload", + fileMetadataLoadBefore, fileMetadataLoadAfter); + // From empty to unset + fileMetadataLoadBefore = fileMetadataLoadAfter; + hmsTbl.getSd().unsetParameters(); + fileMetadataLoadAfter = processAlterTableAndReturnMetric(testTblName, hmsTbl); + assertEquals("SD parameters changed from empty to unset should not trigger reload", + fileMetadataLoadBefore, fileMetadataLoadAfter); } private long processAlterTableAndReturnMetric(String testTblName,
