ArafatKhan2198 commented on code in PR #8002: URL: https://github.com/apache/ozone/pull/8002#discussion_r1980842410
########## hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperHelper.java: ########## @@ -0,0 +1,472 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.tasks; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.hdds.utils.db.RDBBatchOperation; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.hadoop.ozone.recon.ReconConstants; +import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix; +import org.apache.hadoop.ozone.recon.api.types.KeyPrefixContainer; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper class that encapsulates the common logic for ContainerKeyMapperTaskFSO and ContainerKeyMapperTaskOBS. + */ +public abstract class ContainerKeyMapperHelper { + + private static final Logger LOG = LoggerFactory.getLogger(ContainerKeyMapperHelper.class); + + // Static lock to guard table truncation. + private static final Object TRUNCATE_LOCK = new Object(); + + /** + * Ensures that the container key tables are truncated only once before reprocessing. + * Uses an AtomicBoolean to track if truncation has already been performed. + * + * @param reconContainerMetadataManager The metadata manager instance responsible for DB operations. + */ + public static void truncateTablesIfNeeded(ReconContainerMetadataManager reconContainerMetadataManager) { + synchronized (TRUNCATE_LOCK) { + if (ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.compareAndSet(false, true)) { + try { + // Perform table truncation + reconContainerMetadataManager.reinitWithNewContainerDataFromOm(new HashMap<>()); + LOG.info("Successfully truncated container key tables."); + } catch (Exception e) { + // Reset the flag so truncation can be retried + ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.set(false); + LOG.error("Error while truncating container key tables. Resetting flag.", e); + throw new RuntimeException("Table truncation failed", e); + } + } else { + LOG.info("Container key tables already truncated by another task."); + } + } + } + + public static boolean reprocess(OMMetadataManager omMetadataManager, + ReconContainerMetadataManager reconContainerMetadataManager, + BucketLayout bucketLayout, + String taskName, + long containerKeyFlushToDBMaxThreshold) { + long omKeyCount = 0; + Map<ContainerKeyPrefix, Integer> containerKeyMap = new HashMap<>(); + Map<Long, Long> containerKeyCountMap = new HashMap<>(); + + try { + LOG.info("Starting a 'reprocess' run for {}.", taskName); + Instant start = Instant.now(); + + // Ensure the tables are truncated only once + truncateTablesIfNeeded(reconContainerMetadataManager); + + // Get the appropriate table based on BucketLayout + Table<String, OmKeyInfo> omKeyInfoTable = omMetadataManager.getKeyTable(bucketLayout); + + // Iterate through the table and process keys + try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> keyIter = omKeyInfoTable.iterator()) { + while (keyIter.hasNext()) { + Table.KeyValue<String, OmKeyInfo> kv = keyIter.next(); + handleKeyReprocess(kv.getKey(), kv.getValue(), containerKeyMap, containerKeyCountMap, + reconContainerMetadataManager); + omKeyCount++; + + // Check and flush data if it reaches the batch threshold + if (!checkAndCallFlushToDB(containerKeyMap, containerKeyFlushToDBMaxThreshold, + reconContainerMetadataManager)) { + LOG.error("Failed to flush container key data for {}", taskName); + return false; + } + } + } + + // Final flush and commit + if (!flushAndCommitContainerKeyInfoToDB(containerKeyMap, containerKeyCountMap, reconContainerMetadataManager)) { + LOG.error("Failed to flush Container Key data to DB for {}", taskName); + return false; + } + + Instant end = Instant.now(); + long durationMillis = Duration.between(start, end).toMillis(); + double durationSeconds = (double) durationMillis / 1000.0; + LOG.info("Completed 'reprocess' for {}. Processed {} keys in {} ms ({} seconds).", + taskName, omKeyCount, durationMillis, durationSeconds); + + } catch (IOException ioEx) { + LOG.error("Error populating Container Key data for {} in Recon DB.", taskName, ioEx); + return false; + } + return true; + } + + private static boolean checkAndCallFlushToDB(Map<ContainerKeyPrefix, Integer> containerKeyMap, + long containerKeyFlushToDBMaxThreshold, + ReconContainerMetadataManager reconContainerMetadataManager) { + if (containerKeyMap.size() >= containerKeyFlushToDBMaxThreshold) { + return flushAndCommitContainerKeyInfoToDB(containerKeyMap, Collections.emptyMap(), reconContainerMetadataManager); + } + return true; + } + + public static boolean process(OMUpdateEventBatch events, + String tableName, + ReconContainerMetadataManager reconContainerMetadataManager, + String taskName) { + Iterator<OMDBUpdateEvent> eventIterator = events.getIterator(); + int eventCount = 0; + + // In-memory maps for fast look up and batch write + // (HDDS-8580) containerKeyMap map is allowed to be used + // in "process" without batching since the maximum number of keys + // is bounded by delta limit configurations + + // (container, key) -> count + Map<ContainerKeyPrefix, Integer> containerKeyMap = new HashMap<>(); + // containerId -> key count + Map<Long, Long> containerKeyCountMap = new HashMap<>(); + // List of the deleted (container, key) pair's + List<ContainerKeyPrefix> deletedKeyCountList = new ArrayList<>(); + long startTime = System.currentTimeMillis(); + + while (eventIterator.hasNext()) { + OMDBUpdateEvent<String, OmKeyInfo> omdbUpdateEvent = eventIterator.next(); + // Filter event inside process method to avoid duping + if (!tableName.equals(omdbUpdateEvent.getTable())) { + continue; + } + String updatedKey = omdbUpdateEvent.getKey(); + OmKeyInfo updatedKeyValue = omdbUpdateEvent.getValue(); + try { + switch (omdbUpdateEvent.getAction()) { + case PUT: + handlePutOMKeyEvent(updatedKey, updatedKeyValue, containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + break; + + case DELETE: + handleDeleteOMKeyEvent(updatedKey, containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + break; + + case UPDATE: + if (omdbUpdateEvent.getOldValue() != null) { + handleDeleteOMKeyEvent( + omdbUpdateEvent.getOldValue().getKeyName(), containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + } else { + LOG.warn("Update event does not have the old Key Info for {}.", updatedKey); + } + handlePutOMKeyEvent(updatedKey, updatedKeyValue, containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + break; + + default: + LOG.info("Skipping DB update event: {}", omdbUpdateEvent.getAction()); + } + eventCount++; + } catch (IOException e) { + LOG.error("Unexpected exception while updating key data: {} ", updatedKey, e); + return false; + } + } + try { + writeToTheDB(containerKeyMap, containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + } catch (IOException e) { + LOG.error("Unable to write Container Key Prefix data in Recon DB.", e); + return false; + } + LOG.info("{} successfully processed {} OM DB update event(s) in {} milliseconds.", Review Comment: Will change to debug, and yes they are camptured in the metrics :- https://github.com/apache/ozone/pull/7835/files#:~:text=Resolve%20conversation-,metrics.updateTaskReprocessLatency(Time.monotonicNow()%20%2D%20startTime)%3B,%7D,-%7D ########## hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperHelper.java: ########## @@ -0,0 +1,472 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.tasks; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.hdds.utils.db.RDBBatchOperation; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.hadoop.ozone.recon.ReconConstants; +import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix; +import org.apache.hadoop.ozone.recon.api.types.KeyPrefixContainer; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper class that encapsulates the common logic for ContainerKeyMapperTaskFSO and ContainerKeyMapperTaskOBS. + */ +public abstract class ContainerKeyMapperHelper { + + private static final Logger LOG = LoggerFactory.getLogger(ContainerKeyMapperHelper.class); + + // Static lock to guard table truncation. + private static final Object TRUNCATE_LOCK = new Object(); + + /** + * Ensures that the container key tables are truncated only once before reprocessing. + * Uses an AtomicBoolean to track if truncation has already been performed. + * + * @param reconContainerMetadataManager The metadata manager instance responsible for DB operations. + */ + public static void truncateTablesIfNeeded(ReconContainerMetadataManager reconContainerMetadataManager) { + synchronized (TRUNCATE_LOCK) { + if (ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.compareAndSet(false, true)) { + try { + // Perform table truncation + reconContainerMetadataManager.reinitWithNewContainerDataFromOm(new HashMap<>()); + LOG.info("Successfully truncated container key tables."); + } catch (Exception e) { + // Reset the flag so truncation can be retried + ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.set(false); + LOG.error("Error while truncating container key tables. Resetting flag.", e); + throw new RuntimeException("Table truncation failed", e); + } + } else { + LOG.info("Container key tables already truncated by another task."); + } + } + } + + public static boolean reprocess(OMMetadataManager omMetadataManager, + ReconContainerMetadataManager reconContainerMetadataManager, + BucketLayout bucketLayout, + String taskName, + long containerKeyFlushToDBMaxThreshold) { + long omKeyCount = 0; + Map<ContainerKeyPrefix, Integer> containerKeyMap = new HashMap<>(); + Map<Long, Long> containerKeyCountMap = new HashMap<>(); + + try { + LOG.info("Starting a 'reprocess' run for {}.", taskName); + Instant start = Instant.now(); + + // Ensure the tables are truncated only once + truncateTablesIfNeeded(reconContainerMetadataManager); + + // Get the appropriate table based on BucketLayout + Table<String, OmKeyInfo> omKeyInfoTable = omMetadataManager.getKeyTable(bucketLayout); + + // Iterate through the table and process keys + try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> keyIter = omKeyInfoTable.iterator()) { + while (keyIter.hasNext()) { + Table.KeyValue<String, OmKeyInfo> kv = keyIter.next(); + handleKeyReprocess(kv.getKey(), kv.getValue(), containerKeyMap, containerKeyCountMap, + reconContainerMetadataManager); + omKeyCount++; + + // Check and flush data if it reaches the batch threshold + if (!checkAndCallFlushToDB(containerKeyMap, containerKeyFlushToDBMaxThreshold, + reconContainerMetadataManager)) { + LOG.error("Failed to flush container key data for {}", taskName); + return false; + } + } + } + + // Final flush and commit + if (!flushAndCommitContainerKeyInfoToDB(containerKeyMap, containerKeyCountMap, reconContainerMetadataManager)) { + LOG.error("Failed to flush Container Key data to DB for {}", taskName); + return false; + } + + Instant end = Instant.now(); + long durationMillis = Duration.between(start, end).toMillis(); + double durationSeconds = (double) durationMillis / 1000.0; + LOG.info("Completed 'reprocess' for {}. Processed {} keys in {} ms ({} seconds).", Review Comment: Will change to debug, and yes they are camptured in the metrics :- https://github.com/apache/ozone/pull/7835/files#:~:text=Resolve%20conversation-,metrics.updateTaskReprocessLatency(Time.monotonicNow()%20%2D%20startTime)%3B,%7D,-%7D ########## hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerKeyMapperHelper.java: ########## @@ -0,0 +1,472 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.tasks; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.hdds.utils.db.RDBBatchOperation; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.apache.hadoop.ozone.recon.ReconConstants; +import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix; +import org.apache.hadoop.ozone.recon.api.types.KeyPrefixContainer; +import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper class that encapsulates the common logic for ContainerKeyMapperTaskFSO and ContainerKeyMapperTaskOBS. + */ +public abstract class ContainerKeyMapperHelper { + + private static final Logger LOG = LoggerFactory.getLogger(ContainerKeyMapperHelper.class); + + // Static lock to guard table truncation. + private static final Object TRUNCATE_LOCK = new Object(); + + /** + * Ensures that the container key tables are truncated only once before reprocessing. + * Uses an AtomicBoolean to track if truncation has already been performed. + * + * @param reconContainerMetadataManager The metadata manager instance responsible for DB operations. + */ + public static void truncateTablesIfNeeded(ReconContainerMetadataManager reconContainerMetadataManager) { + synchronized (TRUNCATE_LOCK) { + if (ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.compareAndSet(false, true)) { + try { + // Perform table truncation + reconContainerMetadataManager.reinitWithNewContainerDataFromOm(new HashMap<>()); + LOG.info("Successfully truncated container key tables."); + } catch (Exception e) { + // Reset the flag so truncation can be retried + ReconConstants.CONTAINER_KEY_TABLES_TRUNCATED.set(false); + LOG.error("Error while truncating container key tables. Resetting flag.", e); + throw new RuntimeException("Table truncation failed", e); + } + } else { + LOG.info("Container key tables already truncated by another task."); + } + } + } + + public static boolean reprocess(OMMetadataManager omMetadataManager, + ReconContainerMetadataManager reconContainerMetadataManager, + BucketLayout bucketLayout, + String taskName, + long containerKeyFlushToDBMaxThreshold) { + long omKeyCount = 0; + Map<ContainerKeyPrefix, Integer> containerKeyMap = new HashMap<>(); + Map<Long, Long> containerKeyCountMap = new HashMap<>(); + + try { + LOG.info("Starting a 'reprocess' run for {}.", taskName); + Instant start = Instant.now(); + + // Ensure the tables are truncated only once + truncateTablesIfNeeded(reconContainerMetadataManager); + + // Get the appropriate table based on BucketLayout + Table<String, OmKeyInfo> omKeyInfoTable = omMetadataManager.getKeyTable(bucketLayout); + + // Iterate through the table and process keys + try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> keyIter = omKeyInfoTable.iterator()) { + while (keyIter.hasNext()) { + Table.KeyValue<String, OmKeyInfo> kv = keyIter.next(); + handleKeyReprocess(kv.getKey(), kv.getValue(), containerKeyMap, containerKeyCountMap, + reconContainerMetadataManager); + omKeyCount++; + + // Check and flush data if it reaches the batch threshold + if (!checkAndCallFlushToDB(containerKeyMap, containerKeyFlushToDBMaxThreshold, + reconContainerMetadataManager)) { + LOG.error("Failed to flush container key data for {}", taskName); + return false; + } + } + } + + // Final flush and commit + if (!flushAndCommitContainerKeyInfoToDB(containerKeyMap, containerKeyCountMap, reconContainerMetadataManager)) { + LOG.error("Failed to flush Container Key data to DB for {}", taskName); + return false; + } + + Instant end = Instant.now(); + long durationMillis = Duration.between(start, end).toMillis(); + double durationSeconds = (double) durationMillis / 1000.0; + LOG.info("Completed 'reprocess' for {}. Processed {} keys in {} ms ({} seconds).", + taskName, omKeyCount, durationMillis, durationSeconds); + + } catch (IOException ioEx) { + LOG.error("Error populating Container Key data for {} in Recon DB.", taskName, ioEx); + return false; + } + return true; + } + + private static boolean checkAndCallFlushToDB(Map<ContainerKeyPrefix, Integer> containerKeyMap, + long containerKeyFlushToDBMaxThreshold, + ReconContainerMetadataManager reconContainerMetadataManager) { + if (containerKeyMap.size() >= containerKeyFlushToDBMaxThreshold) { + return flushAndCommitContainerKeyInfoToDB(containerKeyMap, Collections.emptyMap(), reconContainerMetadataManager); + } + return true; + } + + public static boolean process(OMUpdateEventBatch events, + String tableName, + ReconContainerMetadataManager reconContainerMetadataManager, + String taskName) { + Iterator<OMDBUpdateEvent> eventIterator = events.getIterator(); + int eventCount = 0; + + // In-memory maps for fast look up and batch write + // (HDDS-8580) containerKeyMap map is allowed to be used + // in "process" without batching since the maximum number of keys + // is bounded by delta limit configurations + + // (container, key) -> count + Map<ContainerKeyPrefix, Integer> containerKeyMap = new HashMap<>(); + // containerId -> key count + Map<Long, Long> containerKeyCountMap = new HashMap<>(); + // List of the deleted (container, key) pair's + List<ContainerKeyPrefix> deletedKeyCountList = new ArrayList<>(); + long startTime = System.currentTimeMillis(); + + while (eventIterator.hasNext()) { + OMDBUpdateEvent<String, OmKeyInfo> omdbUpdateEvent = eventIterator.next(); + // Filter event inside process method to avoid duping + if (!tableName.equals(omdbUpdateEvent.getTable())) { + continue; + } + String updatedKey = omdbUpdateEvent.getKey(); + OmKeyInfo updatedKeyValue = omdbUpdateEvent.getValue(); + try { + switch (omdbUpdateEvent.getAction()) { + case PUT: + handlePutOMKeyEvent(updatedKey, updatedKeyValue, containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + break; + + case DELETE: + handleDeleteOMKeyEvent(updatedKey, containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + break; + + case UPDATE: + if (omdbUpdateEvent.getOldValue() != null) { + handleDeleteOMKeyEvent( + omdbUpdateEvent.getOldValue().getKeyName(), containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + } else { + LOG.warn("Update event does not have the old Key Info for {}.", updatedKey); + } + handlePutOMKeyEvent(updatedKey, updatedKeyValue, containerKeyMap, + containerKeyCountMap, deletedKeyCountList, reconContainerMetadataManager); + break; + + default: + LOG.info("Skipping DB update event: {}", omdbUpdateEvent.getAction()); Review Comment: Done! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
