danny0405 commented on code in PR #9374:
URL: https://github.com/apache/hudi/pull/9374#discussion_r1285145028
##########
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/SyncUtilHelpers.java:
##########
@@ -33,36 +32,56 @@
import java.util.Map;
import java.util.Properties;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT;
+import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
+import static
org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
/**
* Helper class for syncing Hudi commit data with external metastores.
*/
public class SyncUtilHelpers {
private static final Logger LOG =
LoggerFactory.getLogger(SyncUtilHelpers.class);
+
+ // Locks for each table (base path) to avoid concurrent modification of the
same underneath meta storage.
+ // Meta store such as Hive may encounter {@code
ConcurrentModificationException} for #alter_table.
+ private static final ConcurrentHashMap<String, Lock> TABLE_LOCKS = new
ConcurrentHashMap<>();
+
/**
* Create an instance of an implementation of {@link HoodieSyncTool} that
will sync all the relevant meta information
* with an external metastore such as Hive etc. to ensure Hoodie tables can
be queried or read via external systems.
*
- * <p>IMPORTANT: make this method class level thread safe to avoid
concurrent modification of the same underneath meta storage.
- * Meta store such as Hive may encounter {@code
ConcurrentModificationException} for #alter_table.
- *
- * @param syncToolClassName Class name of the {@link HoodieSyncTool}
implementation.
- * @param props property map.
- * @param hadoopConfig Hadoop confs.
- * @param fs Filesystem used.
- * @param targetBasePath The target base path that contains the hoodie
table.
- * @param baseFileFormat The file format used by the hoodie table
(defaults to PARQUET).
+ * @param syncToolClassName Class name of the {@link HoodieSyncTool}
implementation.
+ * @param props property map.
+ * @param hadoopConfig Hadoop confs.
+ * @param fs Filesystem used.
+ * @param targetBasePath The target base path that contains the hoodie
table.
+ * @param baseFileFormat The file format used by the hoodie table
(defaults to PARQUET).
*/
- public static synchronized void runHoodieMetaSync(String syncToolClassName,
+ public static void runHoodieMetaSync(String syncToolClassName,
TypedProperties props,
Configuration hadoopConfig,
FileSystem fs,
String targetBasePath,
String baseFileFormat) {
- try (HoodieSyncTool syncTool = instantiateMetaSyncTool(syncToolClassName,
props, hadoopConfig, fs, targetBasePath, baseFileFormat)) {
- syncTool.syncHoodieTable();
- } catch (Throwable e) {
- throw new HoodieMetaSyncException("Could not sync using the meta sync
class " + syncToolClassName, e);
+ if (targetBasePath == null) {
+ throw new IllegalArgumentException("Target base path must not be null");
+ }
+
+ // Get or create a lock for the specific table
+ Lock tableLock = TABLE_LOCKS.computeIfAbsent(targetBasePath, k -> new
ReentrantLock());
Review Comment:
yeah, we can always extend it with more sophisticated requests.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]