This is an automated email from the ASF dual-hosted git repository.
pwason pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new abb5fd2ad65d feat: add support for touch partitions in HiveSyncTool
(#18064)
abb5fd2ad65d is described below
commit abb5fd2ad65d1e39afc9f3b5e324b54c8f6de957
Author: Nada <[email protected]>
AuthorDate: Tue Mar 10 13:48:06 2026 -0400
feat: add support for touch partitions in HiveSyncTool (#18064)
* Add touch partition event for HMS registration
* Fix compilation errors in hudi-hive-sync module
* test: Remove deprecated HMS sync mode test
* Revert HMS sync mode deprecation
* test: Update TestHiveSyncTool to expect TOUCH partition events
* Only sync touch partitions when conditional sync is enabled
* Update documentation for touchPartitionsToTable method
* Refactor partition alter logic in QueryBasedDDLExecutor
* Fix TOUCH event generation to respect conditional sync setting
* Minor formatting cleanup in TestHiveSyncTool
* Minor whitespace cleanup in TestHiveSyncTool
* Add config to guard TOUCH partition events during meta sync
* Use dedicated config flag for touch partition operations
* Update TOUCH partition event documentation and simplify code
---
.../java/org/apache/hudi/hive/HiveSyncTool.java | 10 ++-
.../org/apache/hudi/hive/HoodieHiveSyncClient.java | 19 +++++
.../java/org/apache/hudi/hive/ddl/DDLExecutor.java | 10 +++
.../org/apache/hudi/hive/ddl/HMSDDLExecutor.java | 46 ++++++++----
.../hudi/hive/ddl/QueryBasedDDLExecutor.java | 64 ++++++++++++----
.../org/apache/hudi/hive/TestHiveSyncTool.java | 86 +++++++++++++++++++++-
.../hudi/sync/common/HoodieMetaSyncOperations.java | 12 +++
.../apache/hudi/sync/common/HoodieSyncClient.java | 4 +
.../apache/hudi/sync/common/HoodieSyncConfig.java | 8 ++
.../hudi/sync/common/model/PartitionEvent.java | 6 +-
10 files changed, 229 insertions(+), 36 deletions(-)
diff --git
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
index d91d4d746ce0..8c827d4dc1fc 100644
---
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
+++
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
@@ -43,6 +43,7 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -72,6 +73,7 @@ import static
org.apache.hudi.hive.util.HiveSchemaUtil.getSchemaDifference;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_CONDITIONAL_SYNC;
+import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TOUCH_PARTITIONS_ENABLED;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_INCREMENTAL;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_FIELDS;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_SNAPSHOT_WITH_TABLE_NAME;
@@ -550,7 +552,13 @@ public class HiveSyncTool extends HoodieSyncTool
implements AutoCloseable {
syncClient.dropPartitions(tableName, dropPartitions);
}
- return !updatePartitions.isEmpty() || !newPartitions.isEmpty() ||
!dropPartitions.isEmpty();
+ List<String> touchPartitions =
config.getBoolean(META_SYNC_TOUCH_PARTITIONS_ENABLED) ?
filterPartitions(partitionEventList, PartitionEventType.TOUCH) :
Collections.emptyList();
+ if (!touchPartitions.isEmpty()) {
+ log.info("Touch Partitions " + touchPartitions);
+ syncClient.touchPartitionsToTable(tableName, touchPartitions);
+ }
+
+ return !updatePartitions.isEmpty() || !newPartitions.isEmpty() ||
!dropPartitions.isEmpty() || !touchPartitions.isEmpty();
}
private List<String> filterPartitions(List<PartitionEvent> events,
PartitionEventType eventType) {
diff --git
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
index f8dc69285aab..2935e433365b 100644
---
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
+++
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java
@@ -18,6 +18,7 @@
package org.apache.hudi.hive;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.schema.HoodieSchema;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -53,6 +54,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
import java.util.stream.Collectors;
import static
org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP;
@@ -88,6 +91,7 @@ public class HoodieHiveSyncClient extends HoodieSyncClient {
// disable jdbc and depend on metastore client for all hive registrations
try {
this.client = IMetaStoreClientUtil.getMSC(config.getHiveConf());
+ setMetaConf(config.getHiveConf());
if (!StringUtils.isNullOrEmpty(config.getString(HIVE_SYNC_MODE))) {
HiveSyncMode syncMode =
HiveSyncMode.of(config.getString(HIVE_SYNC_MODE));
switch (syncMode) {
@@ -131,6 +135,11 @@ public class HoodieHiveSyncClient extends HoodieSyncClient
{
ddlExecutor.updatePartitionsToTable(tableName, changedPartitions);
}
+ @Override
+ public void touchPartitionsToTable(String tableName, List<String>
touchPartitions) {
+ ddlExecutor.touchPartitionsToTable(tableName, touchPartitions);
+ }
+
@Override
public void dropPartitions(String tableName, List<String> partitionsToDrop) {
ddlExecutor.dropPartitionsToTable(tableName, partitionsToDrop);
@@ -500,4 +509,14 @@ public class HoodieHiveSyncClient extends HoodieSyncClient
{
throw new HoodieHiveSyncException("Failed to get the basepath of the
table " + tableId(databaseName, tableName), e);
}
}
+
+ private void setMetaConf(HiveConf configuration) throws TException {
+ Properties confProperties = configuration.getAllProperties();
+ Set<String> confPropertyNames = confProperties.stringPropertyNames();
+ for (String propertyName : confPropertyNames) {
+ if (propertyName.startsWith("hive.metastore.callerContext")) {
+ this.client.setMetaConf(propertyName,
confProperties.getProperty(propertyName));
+ }
+ }
+ }
}
diff --git
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
index 324b0f305056..d1e0e03aca3f 100644
---
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
+++
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/DDLExecutor.java
@@ -83,6 +83,16 @@ public interface DDLExecutor extends AutoCloseable {
*/
void updatePartitionsToTable(String tableName, List<String>
changedPartitions);
+ /**
+ * Touches partitions for a given table. Updates partition metadata (e.g.
last modified time)
+ * in the metastore for partitions that had new data written but no schema
or location change.
+ * Only invoked when {@code META_SYNC_TOUCH_PARTITIONS_ENABLED} is true.
+ *
+ * @param tableName The table name in the metastore.
+ * @param touchPartitions List of partition paths (storage format) to touch.
+ */
+ void touchPartitionsToTable(String tableName, List<String> touchPartitions);
+
/**
* Drop partitions for a given table.
*
diff --git
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
index 81ba15660f39..6b7bd3af0096 100644
---
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
+++
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java
@@ -225,23 +225,17 @@ public class HMSDDLExecutor implements DDLExecutor {
return;
}
log.info("Changing partitions {} on {}", changedPartitions.size(),
tableName);
- try {
- StorageDescriptor sd = client.getTable(databaseName, tableName).getSd();
- List<Partition> partitionList = changedPartitions.stream().map(partition
-> {
- Path partitionPath =
HadoopFSUtils.constructAbsolutePathInHadoopPath(syncConfig.getString(META_SYNC_BASE_PATH),
partition);
- String partitionScheme = partitionPath.toUri().getScheme();
- String fullPartitionPath =
StorageSchemes.HDFS.getScheme().equals(partitionScheme)
- ?
HadoopFSUtils.getDFSFullPartitionPath(syncConfig.getHadoopFileSystem(),
partitionPath) : partitionPath.toString();
- List<String> partitionValues =
partitionValueExtractor.extractPartitionValuesInPath(partition);
- StorageDescriptor partitionSd = sd.deepCopy();
- partitionSd.setLocation(fullPartitionPath);
- return new Partition(partitionValues, databaseName, tableName, 0, 0,
partitionSd, null);
- }).collect(Collectors.toList());
- client.alter_partitions(databaseName, tableName, partitionList, null);
- } catch (TException e) {
- log.error("{}.{} update partition failed", databaseName, tableName, e);
- throw new HoodieHiveSyncException(databaseName + "." + tableName + "
update partition failed", e);
+ registerAlterPartitionEvent(tableName, changedPartitions);
+ }
+
+ @Override
+ public void touchPartitionsToTable(String tableName, List<String>
touchedPartitions) {
+ if (touchedPartitions.isEmpty()) {
+ log.info("No partitions to touch for {}", tableName);
+ return;
}
+ log.info("Touching partitions {} on {}", touchedPartitions.size(),
tableName);
+ registerAlterPartitionEvent(tableName, touchedPartitions);
}
@Override
@@ -294,4 +288,24 @@ public class HMSDDLExecutor implements DDLExecutor {
Hive.closeCurrent();
}
}
+
+ private void registerAlterPartitionEvent(String tableName, List<String>
alteredPartitions) {
+ try {
+ StorageDescriptor sd = client.getTable(databaseName, tableName).getSd();
+ List<Partition> partitionList = alteredPartitions.stream().map(partition
-> {
+ Path partitionPath =
HadoopFSUtils.constructAbsolutePathInHadoopPath(syncConfig.getString(META_SYNC_BASE_PATH),
partition);
+ String partitionScheme = partitionPath.toUri().getScheme();
+ String fullPartitionPath =
StorageSchemes.HDFS.getScheme().equals(partitionScheme)
+ ?
HadoopFSUtils.getDFSFullPartitionPath(syncConfig.getHadoopFileSystem(),
partitionPath) : partitionPath.toString();
+ List<String> partitionValues =
partitionValueExtractor.extractPartitionValuesInPath(partition);
+ StorageDescriptor partitionSd = sd.deepCopy();
+ partitionSd.setLocation(fullPartitionPath);
+ return new Partition(partitionValues, databaseName, tableName, 0, 0,
partitionSd, null);
+ }).collect(Collectors.toList());
+ client.alter_partitions(databaseName, tableName, partitionList, null);
+ } catch (TException e) {
+ log.error("{}.{} update partition failed", databaseName, tableName, e);
+ throw new HoodieHiveSyncException(databaseName + "." + tableName + "
update partition failed", e);
+ }
+ }
}
diff --git
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java
index 472bcedd328a..7f776f2f7a04 100644
---
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java
+++
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java
@@ -130,7 +130,7 @@ public abstract class QueryBasedDDLExecutor implements
DDLExecutor {
return;
}
log.info("Changing partitions {} on {}", changedPartitions.size(),
tableName);
- List<String> sqls = constructChangePartitions(tableName,
changedPartitions);
+ List<String> sqls = constructPartitionAlterStatements(tableName,
changedPartitions, PartitionAlterType.SET_LOCATION);
for (String sql : sqls) {
runSQL(sql);
}
@@ -202,23 +202,57 @@ public abstract class QueryBasedDDLExecutor implements
DDLExecutor {
return String.join(",", partBuilder);
}
- private List<String> constructChangePartitions(String tableName,
List<String> partitions) {
- List<String> changePartitions = new ArrayList<>();
+ @Override
+ public void touchPartitionsToTable(String tableName, List<String>
touchPartitions) {
+ if (touchPartitions.isEmpty()) {
+ log.info("No partitions to touch for " + tableName);
+ return;
+ }
+ log.info("Touching partitions " + touchPartitions.size() + " on " +
tableName);
+ List<String> sqls = constructPartitionAlterStatements(tableName,
touchPartitions, PartitionAlterType.TOUCH);
+ for (String sql : sqls) {
+ runSQL(sql);
+ }
+ }
+
+ /**
+ * Builds SQL statements to either touch partitions or set their location.
+ * TOUCH: one ALTER TABLE ... TOUCH PARTITION (p1) PARTITION (p2) ...
+ * SET_LOCATION: one ALTER TABLE ... PARTITION (p) SET LOCATION '...' per
partition.
+ */
+ private List<String> constructPartitionAlterStatements(String tableName,
List<String> partitions, PartitionAlterType alterType) {
+ List<String> result = new ArrayList<>();
// Hive 2.x doesn't like db.table name for operations, hence we need to
change to using the database first
String useDatabase = "USE " + HIVE_ESCAPE_CHARACTER + databaseName +
HIVE_ESCAPE_CHARACTER;
- changePartitions.add(useDatabase);
- String alterTable = "ALTER TABLE " + HIVE_ESCAPE_CHARACTER + tableName +
HIVE_ESCAPE_CHARACTER;
- for (String partition : partitions) {
- String partitionClause = getPartitionClause(partition);
- Path partitionPath =
HadoopFSUtils.constructAbsolutePathInHadoopPath(config.getString(META_SYNC_BASE_PATH),
partition);
- String partitionScheme = partitionPath.toUri().getScheme();
- String fullPartitionPath =
StorageSchemes.HDFS.getScheme().equals(partitionScheme)
- ?
HadoopFSUtils.getDFSFullPartitionPath(config.getHadoopFileSystem(),
partitionPath) : partitionPath.toString();
- String changePartition =
- alterTable + " PARTITION (" + partitionClause + ") SET LOCATION '" +
fullPartitionPath + "'";
- changePartitions.add(changePartition);
+ result.add(useDatabase);
+ String alterTablePrefix = "ALTER TABLE " + HIVE_ESCAPE_CHARACTER +
tableName + HIVE_ESCAPE_CHARACTER;
+ switch (alterType) {
+ case TOUCH:
+ String alterTable = alterTablePrefix + " TOUCH";
+ for (String partition : partitions) {
+ alterTable += " PARTITION (" + getPartitionClause(partition) + ")";
+ }
+ result.add(alterTable);
+ break;
+ case SET_LOCATION:
+ for (String partition : partitions) {
+ String partitionClause = getPartitionClause(partition);
+ Path partitionPath =
HadoopFSUtils.constructAbsolutePathInHadoopPath(config.getString(META_SYNC_BASE_PATH),
partition);
+ String partitionScheme = partitionPath.toUri().getScheme();
+ String fullPartitionPath =
StorageSchemes.HDFS.getScheme().equals(partitionScheme)
+ ?
HadoopFSUtils.getDFSFullPartitionPath(config.getHadoopFileSystem(),
partitionPath) : partitionPath.toString();
+ result.add(alterTablePrefix + " PARTITION (" + partitionClause + ")
SET LOCATION '" + fullPartitionPath + "'");
+ }
+ break;
+ default:
+ throw new HoodieHiveSyncException("Partition alter type not supported:
" + alterType);
}
- return changePartitions;
+ return result;
+ }
+
+ private enum PartitionAlterType {
+ TOUCH,
+ SET_LOCATION
}
}
diff --git
a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
index 740475af97fa..01ff1daea6da 100644
---
a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
+++
b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
@@ -116,6 +116,7 @@ import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_INCREMENTAL
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_FIELDS;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
+import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TOUCH_PARTITIONS_ENABLED;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -167,6 +168,16 @@ public class TestHiveSyncTool {
return opts;
}
+ // syncMode, touchPartitionsEnabled
+ private static Iterable<Object[]> syncModeAndTouchPartitionsEnabled() {
+ List<Object[]> opts = new ArrayList<>();
+ for (Object mode : SYNC_MODES) {
+ opts.add(new Object[] {mode, true});
+ opts.add(new Object[] {mode, false});
+ }
+ return opts;
+ }
+
private HiveSyncTool hiveSyncTool;
private HoodieHiveSyncClient hiveClient;
@@ -375,9 +386,13 @@ public class TestHiveSyncTool {
hivePartitions = hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
List<String> writtenPartitionsSince =
hiveClient.getWrittenPartitionsSince(Option.empty(), Option.empty());
List<PartitionEvent> partitionEvents =
hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince,
Collections.emptySet());
- assertEquals(1, partitionEvents.size(), "There should be only one
partition event");
- assertEquals(PartitionEventType.UPDATE,
partitionEvents.iterator().next().eventType,
- "The one partition event must of type UPDATE");
+ assertEquals(1, partitionEvents.stream()
+ .filter(partitionEvent ->
partitionEvent.eventType.equals(PartitionEventType.UPDATE)).count(),
+ "There should be only one update partition event");
+ // TOUCH events are not produced when META_SYNC_TOUCH_PARTITIONS_ENABLED
is disabled (default)
+ assertEquals(0, partitionEvents.stream()
+ .filter(partitionEvent ->
partitionEvent.eventType.equals(PartitionEventType.TOUCH)).count(),
+ "There should be zero touch partition events when touch partitions is
disabled");
// Add a partition that does not belong to the table, i.e., not in the
same base path
// This should not happen in production. However, if this happens, when
doing fallback
@@ -1464,6 +1479,71 @@ public class TestHiveSyncTool {
"The last commit that was synced should be updated in the
TBLPROPERTIES");
}
+ @ParameterizedTest
+ @MethodSource("syncModeAndTouchPartitionsEnabled")
+ public void testTouchPartition(String syncMode, boolean
touchPartitionsEnabled) throws Exception {
+ hiveSyncProps.setProperty(HIVE_SYNC_MODE.key(), syncMode);
+ hiveSyncProps.setProperty(META_SYNC_TOUCH_PARTITIONS_ENABLED.key(),
String.valueOf(touchPartitionsEnabled));
+ String instantTime = "100";
+ HiveTestUtil.createCOWTable(instantTime, 5, true);
+
+ HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." +
HiveTestUtil.TABLE_NAME);
+ reInitHiveSyncClient();
+ assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
+ "Table " + HiveTestUtil.TABLE_NAME + " should not exist
initially");
+ hiveSyncTool.syncHoodieTable();
+
+ // Reinitialize client after sync as the previous client was closed
+ reInitHiveSyncClient();
+ Option<String> lastCommitTimeSynced =
hiveClient.getLastCommitTimeSynced(HiveTestUtil.TABLE_NAME);
+ Option<String> lastCommitCompletionTimeSynced =
hiveClient.getLastCommitCompletionTimeSynced(HiveTestUtil.TABLE_NAME);
+ assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
+ "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync
completes");
+ assertEquals(hiveClient.getMetastoreSchema(HiveTestUtil.TABLE_NAME).size(),
+ hiveClient.getStorageSchema().getFields().size() +
getPartitionFieldSize(),
+ "Hive Schema should match the table schema + partition field");
+
+ List<Partition> partitions =
hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+ String partitionToTouch = getRelativePartitionPath(new
Path(HiveTestUtil.basePath),
+ new Path(partitions.get(0).getStorageLocation()));
+
+ assertEquals(5, partitions.size(),
+ "Table partitions should match the number of partitions we wrote");
+ assertEquals(instantTime,
hiveClient.getLastCommitTimeSynced(HiveTestUtil.TABLE_NAME).get(),
+ "The last commit that was synced should be updated in the
TBLPROPERTIES");
+
+ // insert into existing partition (creates a touch event)
+ HiveTestUtil.addCOWPartition(partitionToTouch, true, true, "101");
+
+ // sync touch partition event
+ reInitHiveSyncClient();
+ hiveSyncTool.syncHoodieTable();
+
+ // Reinitialize client after sync as the previous client was closed
+ reInitHiveSyncClient();
+ List<Partition> hivePartitions =
hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+ List<String> writtenPartitionsSince =
hiveClient.getWrittenPartitionsSince(lastCommitTimeSynced,
lastCommitCompletionTimeSynced);
+ List<PartitionEvent> partitionEvents =
hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince,
Collections.emptySet());
+ List<String> touchPartitionEvents = partitionEvents.stream()
+ .filter(s -> s.eventType == PartitionEventType.TOUCH)
+ .map(s -> s.storagePartition)
+ .collect(Collectors.toList());
+
+ if (touchPartitionsEnabled) {
+ // check touch partition event was detected
+ assertEquals(1, touchPartitionEvents.size(),
+ "There should be one touch partition event when touch partitions
is enabled");
+ } else {
+ // check no touch partition events when disabled
+ assertEquals(0, touchPartitionEvents.size(),
+ "There should be no touch partition events when touch partitions
is disabled");
+ }
+
+ // Verify last commit time was updated
+ assertEquals("101",
hiveClient.getLastCommitTimeSynced(HiveTestUtil.TABLE_NAME).get(),
+ "Last commit time should be updated");
+ }
+
@ParameterizedTest
@MethodSource("syncModeAndEnablePushDown")
void testGetPartitionEvents_droppedStoragePartitionNotPresentInMetastore(
diff --git
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
index a19d091427c5..24a974536ef3 100644
---
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
+++
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java
@@ -106,6 +106,18 @@ public interface HoodieMetaSyncOperations {
}
+ /**
+ * Touches partitions for a given table. Updates partition metadata (e.g.
last modified time)
+ * in the metastore for partitions that had new data written but no schema
or location change.
+ * Only invoked when {@code META_SYNC_TOUCH_PARTITIONS_ENABLED} is true.
+ *
+ * @param tableName The table name in the metastore.
+ * @param touchPartitions List of partition paths (storage format) to touch.
+ */
+ default void touchPartitionsToTable(String tableName, List<String>
touchPartitions) {
+
+ }
+
/**
* Drop partitions from the table in metastore.
*/
diff --git
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
index 04a6e4a10db1..3cf73e389175 100644
---
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
+++
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
@@ -51,6 +51,7 @@ import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
+import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TOUCH_PARTITIONS_ENABLED;
import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_USE_FILE_LISTING_FROM_METADATA;
@Slf4j
@@ -242,6 +243,9 @@ public abstract class HoodieSyncClient implements
HoodieMetaSyncOperations, Auto
events.add(PartitionEvent.newPartitionAddEvent(storagePartition));
} else if
(!paths.get(storageValue).equals(fullStoragePartitionPath)) {
events.add(PartitionEvent.newPartitionUpdateEvent(storagePartition));
+ } else if (config.getBoolean(META_SYNC_TOUCH_PARTITIONS_ENABLED)) {
+ // Only produce TOUCH events when touch partitions is enabled
+
events.add(PartitionEvent.newPartitionTouchEvent(storagePartition));
}
}
}
diff --git
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
index 53698d492d22..c1fbcc4776da 100644
---
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
+++
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java
@@ -174,6 +174,14 @@ public class HoodieSyncConfig extends HoodieConfig {
+ "This is useful when the partition metadata is large, and the
partition info can be "
+ "obtained from Hudi's internal metadata table. Note, " +
HoodieMetadataConfig.ENABLE + " must be set to true.");
+ public static final ConfigProperty<Boolean>
META_SYNC_TOUCH_PARTITIONS_ENABLED = ConfigProperty
+ .key("hoodie.meta.sync.touch.partitions.enabled")
+ .defaultValue(false)
+ .sinceVersion("1.2.0")
+ .markAdvanced()
+ .withDocumentation("If true, TOUCH partition events will be emitted
during meta sync. "
+ + "TOUCH events indicate partitions that exist in both storage and
metastore, no schema or location change, but the partition has received data.");
+
@Getter
@Setter
private Configuration hadoopConf;
diff --git
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/PartitionEvent.java
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/PartitionEvent.java
index ebdc90af4bfb..fa214f5678c8 100644
---
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/PartitionEvent.java
+++
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/model/PartitionEvent.java
@@ -25,7 +25,7 @@ package org.apache.hudi.sync.common.model;
public class PartitionEvent {
public enum PartitionEventType {
- ADD, UPDATE, DROP
+ ADD, UPDATE, DROP, TOUCH
}
public PartitionEventType eventType;
@@ -47,4 +47,8 @@ public class PartitionEvent {
public static PartitionEvent newPartitionDropEvent(String storagePartition) {
return new PartitionEvent(PartitionEventType.DROP, storagePartition);
}
+
+ public static PartitionEvent newPartitionTouchEvent(String storagePartition)
{
+ return new PartitionEvent(PartitionEventType.TOUCH, storagePartition);
+ }
}