Re: [PR] [HUDI-9177] Add Indexer abstraction and refactor metadata table initialization logic [hudi]

via GitHub Fri, 02 May 2025 17:09:11 -0700


nsivabalan commented on code in PR #12983:
URL: https://github.com/apache/hudi/pull/12983#discussion_r2072243884



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java:
##########
@@ -381,137 +380,118 @@ private boolean initializeFromFilesystem(String 
initializationTime, List<Metadat
       }
     }
 
-    // Already initialized partitions can be ignored
-    partitionsToInit.removeIf(metadataPartition -> 
dataMetaClient.getTableConfig().isMetadataPartitionAvailable((metadataPartition)));
+    indexerMapForPartitionsToInit.keySet().removeIf(
+        metadataPartition -> 
dataMetaClient.getTableConfig().isMetadataPartitionAvailable(metadataPartition));
 
     // Get a complete list of files and partitions from the file system or 
from already initialized FILES partition of MDT
     List<DirectoryInfo> partitionInfoList;
     if (filesPartitionAvailable) {
-      partitionInfoList = listAllPartitionsFromMDT(initializationTime, 
pendingDataInstants);
+      partitionInfoList = listAllPartitionsFromMDT(dataTableInstantTime, 
pendingDataInstants);
     } else {
       // if auto initialization is enabled, then we need to list all 
partitions from the file system
       if (dataWriteConfig.getMetadataConfig().shouldAutoInitialize()) {
-        partitionInfoList = 
listAllPartitionsFromFilesystem(initializationTime, pendingDataInstants);
+        partitionInfoList = 
listAllPartitionsFromFilesystem(dataTableInstantTime, pendingDataInstants);
       } else {
         // if auto initialization is disabled, we can return an empty list
         partitionInfoList = Collections.emptyList();
       }
     }
-    Map<String, Map<String, Long>> partitionToFilesMap = 
partitionInfoList.stream()
+    Map<String, Map<String, Long>> partitionToAllFilesMap = 
partitionInfoList.stream()
         .map(p -> {
           String partitionName = 
HoodieTableMetadataUtil.getPartitionIdentifierForFilesPartition(p.getRelativePath());
           return Pair.of(partitionName, p.getFileNameToSizeMap());
         })
         .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
 
-    // validate that each index is eligible to be initialized
-    Iterator<MetadataPartitionType> iterator = partitionsToInit.iterator();
-    while (iterator.hasNext()) {
-      MetadataPartitionType partitionType = iterator.next();
-      if (partitionType == PARTITION_STATS && 
!dataMetaClient.getTableConfig().isTablePartitioned()) {
-        LOG.warn("Partition stats index cannot be enabled for a 
non-partitioned table. Removing from initialization list. Please disable {}",
-            HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key());
-        iterator.remove();
-        this.enabledPartitionTypes.remove(partitionType);
-      }
+    // TODO(yihua): check this wrt files partition, i.e., after files 
partition is ready in
+    //  MDT, could it be leveraged afterward?
+    Lazy<List<Pair<String, FileSlice>>> lazyLatestMergedPartitionFileSliceList 
= getLazyPartitionFileSliceList();
+    if (!filesPartitionAvailable) {
+      // FILES partition should always be initialized first if enabled
+      initializeMetadataPartition(
+          FILES, indexerMapForPartitionsToInit.get(FILES), 
dataTableInstantTime,
+          partitionToAllFilesMap, lazyLatestMergedPartitionFileSliceList);
     }
+    for (Map.Entry<MetadataPartitionType, Indexer> entry :
+        indexerMapForPartitionsToInit.entrySet().stream()
+            .filter(e -> e.getKey() != FILES).collect(Collectors.toList())) {
+      initializeMetadataPartition(
+          entry.getKey(), entry.getValue(), dataTableInstantTime,
+          partitionToAllFilesMap, lazyLatestMergedPartitionFileSliceList);
+    }
+    return true;
+  }
 
-    for (MetadataPartitionType partitionType : partitionsToInit) {
-      // Find the commit timestamp to use for this partition. Each 
initialization should use its own unique commit time.
-      String instantTimeForPartition = 
generateUniqueInstantTime(initializationTime);
-      String partitionTypeName = partitionType.name();
-      LOG.info("Initializing MDT partition {} at instant {}", 
partitionTypeName, instantTimeForPartition);
-      String partitionName;
-      Pair<Integer, HoodieData<HoodieRecord>> fileGroupCountAndRecordsPair;
-      List<String> columnsToIndex = new ArrayList<>();
-      try {
-        switch (partitionType) {
-          case FILES:
-            fileGroupCountAndRecordsPair = 
initializeFilesPartition(partitionInfoList);
-            partitionName = FILES.getPartitionPath();
-            break;
-          case BLOOM_FILTERS:
-            fileGroupCountAndRecordsPair = 
initializeBloomFiltersPartition(initializationTime, partitionToFilesMap);
-            partitionName = BLOOM_FILTERS.getPartitionPath();
-            break;
-          case COLUMN_STATS:
-            Pair<List<String>, Pair<Integer, HoodieData<HoodieRecord>>> 
colStatsColumnsAndRecord = initializeColumnStatsPartition(partitionToFilesMap);
-            columnsToIndex = colStatsColumnsAndRecord.getKey();
-            fileGroupCountAndRecordsPair = colStatsColumnsAndRecord.getValue();
-            partitionName = COLUMN_STATS.getPartitionPath();
-            break;
-          case RECORD_INDEX:
-            fileGroupCountAndRecordsPair = initializeRecordIndexPartition();
-            partitionName = RECORD_INDEX.getPartitionPath();
-            break;
-          case EXPRESSION_INDEX:
-            Set<String> expressionIndexPartitionsToInit = 
getExpressionIndexPartitionsToInit(partitionType, 
dataWriteConfig.getMetadataConfig(), dataMetaClient);
-            if (expressionIndexPartitionsToInit.size() != 1) {
-              if (expressionIndexPartitionsToInit.size() > 1) {
-                LOG.warn("Skipping expression index initialization as only one 
expression index bootstrap at a time is supported for now. Provided: {}", 
expressionIndexPartitionsToInit);
-              }
-              continue;
-            }
-            partitionName = expressionIndexPartitionsToInit.iterator().next();
-            fileGroupCountAndRecordsPair = 
initializeExpressionIndexPartition(partitionName, instantTimeForPartition);
-            break;
-          case PARTITION_STATS:
-            // For PARTITION_STATS, COLUMN_STATS should also be enabled
-            if (!dataWriteConfig.isMetadataColumnStatsIndexEnabled()) {
-              LOG.warn("Skipping partition stats initialization as column 
stats index is not enabled. Please enable {}",
-                  
HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key());
-              continue;
-            }
-            fileGroupCountAndRecordsPair = initializePartitionStatsIndex();
-            partitionName = PARTITION_STATS.getPartitionPath();
-            break;
-          case SECONDARY_INDEX:
-            Set<String> secondaryIndexPartitionsToInit = 
getSecondaryIndexPartitionsToInit(partitionType, 
dataWriteConfig.getMetadataConfig(), dataMetaClient);
-            if (secondaryIndexPartitionsToInit.size() != 1) {
-              if (secondaryIndexPartitionsToInit.size() > 1) {
-                LOG.warn("Skipping secondary index initialization as only one 
secondary index bootstrap at a time is supported for now. Provided: {}", 
secondaryIndexPartitionsToInit);
-              }
-              continue;
-            }
-            partitionName = secondaryIndexPartitionsToInit.iterator().next();
-            fileGroupCountAndRecordsPair = 
initializeSecondaryIndexPartition(partitionName);
-            break;
-          default:
-            throw new HoodieMetadataException(String.format("Unsupported MDT 
partition type: %s", partitionType));
-        }
-      } catch (Exception e) {
-        String metricKey = partitionType.getPartitionPath() + "_" + 
HoodieMetadataMetrics.BOOTSTRAP_ERR_STR;
-        metrics.ifPresent(m -> m.setMetric(metricKey, 1));
-        String errMsg = String.format("Bootstrap on %s partition failed for 
%s",
-            partitionType.getPartitionPath(), 
metadataMetaClient.getBasePath());
-        LOG.error(errMsg, e);
-        throw new HoodieMetadataException(errMsg, e);
+  private Lazy<List<Pair<String, FileSlice>>> getLazyPartitionFileSliceList() {
+    return Lazy.lazily(() -> {
+      String latestInstant = 
dataMetaClient.getActiveTimeline().filterCompletedAndCompactionInstants().lastInstant()
+          .map(HoodieInstant::requestedTime).orElse(SOLO_COMMIT_TIMESTAMP);
+      // now we need to rely on outside caller to pass fsView and close the 
fsView.
+      // also see if we can reuse fsView across indexes.
+      // Collect the list of latest file slices present in each partition
+      try (HoodieTableFileSystemView fsView = getMetadataView()) {
+        List<String> partitions = metadata.getAllPartitionPaths();
+        fsView.loadAllPartitions();
+        List<Pair<String, FileSlice>> partitionFileSlicePairs = new 
ArrayList<>();
+        partitions.forEach(partition -> 
fsView.getLatestMergedFileSlicesBeforeOrOn(partition, latestInstant)
+            .forEach(fs -> partitionFileSlicePairs.add(Pair.of(partition, 
fs))));
+        return partitionFileSlicePairs;
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
       }
+    });
+  }
 
-      if (LOG.isInfoEnabled()) {
-        LOG.info("Initializing {} index with {} mappings", partitionTypeName, 
fileGroupCountAndRecordsPair.getKey());
-      }
-      HoodieTimer partitionInitTimer = HoodieTimer.start();
-
-      // Generate the file groups
-      final int fileGroupCount = fileGroupCountAndRecordsPair.getKey();
-      ValidationUtils.checkArgument(fileGroupCount > 0, "FileGroup count for 
MDT partition " + partitionTypeName + " should be > 0");
-      initializeFileGroups(dataMetaClient, partitionType, 
instantTimeForPartition, fileGroupCount, partitionName);
-
-      // Perform the commit using bulkCommit
-      HoodieData<HoodieRecord> records = 
fileGroupCountAndRecordsPair.getValue();
-      bulkCommit(instantTimeForPartition, partitionName, records, 
fileGroupCount);
-      if (partitionType == COLUMN_STATS) {
-        // initialize Col Stats index definition
-        updateColumnsToIndexWithColStats(columnsToIndex);
-      }
-      
dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, 
partitionName, true);
-      // initialize the metadata reader again so the MDT partition can be read 
after initialization
-      initMetadataReader();
-      long totalInitTime = partitionInitTimer.endTimer();
-      LOG.info("Initializing {} index in metadata table took {} in ms", 
partitionTypeName, totalInitTime);
+  private void initializeMetadataPartition(
+      MetadataPartitionType partitionType,
+      Indexer indexer,
+      String dataTableInstantTime,
+      Map<String, Map<String, Long>> partitionToAllFilesMap,
+      Lazy<List<Pair<String, FileSlice>>> 
lazyLatestMergedPartitionFileSliceList)
+      throws IOException {
+    // Find the commit timestamp to use for this partition. Each 
initialization should use its own unique commit time.
+    String instantTimeForPartition = 
generateUniqueInstantTime(dataTableInstantTime);
+    String partitionTypeName = partitionType.name();
+    LOG.info("Initializing MDT partition {} at instant {}", partitionTypeName, 
instantTimeForPartition);
+    List<Indexer.InitialIndexPartitionData> initialIndexPartitionDataList;
+    try {
+      initialIndexPartitionDataList = indexer.initialize(
+          dataTableInstantTime, partitionToAllFilesMap, 
lazyLatestMergedPartitionFileSliceList);
+    } catch (Exception e) {
+      String metricKey = partitionType.getPartitionPath() + "_" + 
HoodieMetadataMetrics.BOOTSTRAP_ERR_STR;
+      metrics.ifPresent(m -> m.setMetric(metricKey, 1));
+      String errMsg = String.format("Bootstrap on %s partition failed for %s",
+          partitionType.getPartitionPath(), metadataMetaClient.getBasePath());
+      LOG.error(errMsg, e);
+      throw new HoodieMetadataException(errMsg, e);
     }
-    return true;
+
+    if (initialIndexPartitionDataList.isEmpty()) {
+      LOG.info("Skip building {} index in metadata table", partitionTypeName);
+      return;
+    }
+
+    ValidationUtils.checkArgument(initialIndexPartitionDataList.size() == 1,

Review Comment:
   why do we need to enforce it here. 
   I already see we do it within SecondaryIndexer for instance, 
   ```
   if (secondaryIndexPartitionsToInit.get().size() != 1) {
   if (secondaryIndexPartitionsToInit.get().size() > 1) {
           LOG.warn("Skipping secondary index initialization as only one 
secondary index "
                   + "bootstrap at a time is supported for now. Provided: {}",
               secondaryIndexPartitionsToInit.get());
         }
         return Collections.emptyList();
   ```
   



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/index/Indexer.java:
##########
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.metadata.index;
+
+import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.common.util.collection.Tuple3;
+import org.apache.hudi.util.Lazy;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Interface for initializing and updating a type of metadata or index
+ * in the metadata table
+ * <p>
+ * When a new type of index is added to MetadataPartitionType, an
+ * implementation of the {@link Indexer} interface is required, and it
+ * must be added to {@link IndexerFactory}.
+ */
+public interface Indexer {
+  /**
+   * Generates records for initializing the index.
+   *
+   * @param dataTableInstantTime                   instant time of the data 
table that the metadata table
+   *                                               is initialized on
+   * @param partitionIdToAllFilesMap               map of partition to files
+   * @param lazyLatestMergedPartitionFileSliceList lazily-evaluated list of 
file slices for the indexer
+   *                                               that needs it
+   * @return a list of {@link InitialIndexPartitionData}, which each data item
+   * representing the records to initialize a particular partition (note that
+   * one index type can correspond to one or multiple partitions in the 
metadata
+   * table). An empty list returned indicates that the metadata partition does
+   * not need to be initialized.
+   * @throws IOException upon IO error
+   */
+  List<InitialIndexPartitionData> initialize(
+      String dataTableInstantTime,
+      Map<String, Map<String, Long>> partitionIdToAllFilesMap,
+      Lazy<List<Pair<String, FileSlice>>> 
lazyLatestMergedPartitionFileSliceList) throws IOException;
+
+  /**
+   * Updates the table config of the data table to reflect the state of the 
index
+   */
+  default void updateTableConfig() {
+    // No index-specific table config update by default

Review Comment:
   can you remind me what did we narrow down here. 
   mdt writer will return a list of updated index definitions, and caller will 
take care of updating the TableConfig? 
   and we don't really need this method? 
   



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/index/Indexer.java:
##########
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.metadata.index;
+
+import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.common.util.collection.Tuple3;
+import org.apache.hudi.util.Lazy;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Interface for initializing and updating a type of metadata or index
+ * in the metadata table
+ * <p>
+ * When a new type of index is added to MetadataPartitionType, an
+ * implementation of the {@link Indexer} interface is required, and it
+ * must be added to {@link IndexerFactory}.
+ */
+public interface Indexer {
+  /**
+   * Generates records for initializing the index.
+   *
+   * @param dataTableInstantTime                   instant time of the data 
table that the metadata table
+   *                                               is initialized on
+   * @param partitionIdToAllFilesMap               map of partition to files
+   * @param lazyLatestMergedPartitionFileSliceList lazily-evaluated list of 
file slices for the indexer
+   *                                               that needs it
+   * @return a list of {@link InitialIndexPartitionData}, which each data item
+   * representing the records to initialize a particular partition (note that
+   * one index type can correspond to one or multiple partitions in the 
metadata
+   * table). An empty list returned indicates that the metadata partition does
+   * not need to be initialized.
+   * @throws IOException upon IO error
+   */
+  List<InitialIndexPartitionData> initialize(
+      String dataTableInstantTime,
+      Map<String, Map<String, Long>> partitionIdToAllFilesMap,
+      Lazy<List<Pair<String, FileSlice>>> 
lazyLatestMergedPartitionFileSliceList) throws IOException;
+
+  /**
+   * Updates the table config of the data table to reflect the state of the 
index
+   */
+  default void updateTableConfig() {
+    // No index-specific table config update by default
+  }
+
+  static List<Tuple3<String, String, Boolean>> fetchPartitionFileInfoTriplets(
+      Map<String, Map<String, Long>> partitionToAppendedFiles) {
+    // Total number of files which are added or deleted
+    final int totalFiles = 
partitionToAppendedFiles.values().stream().mapToInt(Map::size).sum();
+    final List<Tuple3<String, String, Boolean>> partitionFileFlagTupleList = 
new ArrayList<>(totalFiles);
+    partitionToAppendedFiles.entrySet().stream()
+        .flatMap(
+            entry -> entry.getValue().keySet().stream().map(addedFile -> 
Tuple3.of(entry.getKey(), addedFile, false)))
+        .collect(Collectors.toCollection(() -> partitionFileFlagTupleList));
+    return partitionFileFlagTupleList;
+  }
+
+  class IndexPartitionData {
+    private final String partitionName;
+    private final HoodieData<HoodieRecord> records;
+
+    private IndexPartitionData(String partitionName, HoodieData<HoodieRecord> 
records) {
+      this.partitionName = partitionName;
+      this.records = records;
+    }
+
+    public static IndexPartitionData of(String partitionName, 
HoodieData<HoodieRecord> records) {
+      return new IndexPartitionData(partitionName, records);
+    }
+
+    public String partitionName() {
+      return partitionName;
+    }
+
+    public HoodieData<HoodieRecord> records() {
+      return records;
+    }
+  }
+
+  class InitialIndexPartitionData {
+    private final int numFileGroup;
+    private final IndexPartitionData partitionedRecords;
+
+    private InitialIndexPartitionData(int numFileGroup,
+                                      String partitionName,
+                                      HoodieData<HoodieRecord> records) {
+      this.numFileGroup = numFileGroup;
+      this.partitionedRecords = IndexPartitionData.of(partitionName, records);
+    }
+
+    public static InitialIndexPartitionData of(int numFileGroup,
+                                               String partitionName,
+                                               HoodieData<HoodieRecord> 
records) {
+      ValidationUtils.checkArgument(numFileGroup > 0,
+          "The number of file groups of the index data should be positive");
+      return new InitialIndexPartitionData(numFileGroup, partitionName, 
records);
+    }
+
+    public int numFileGroup() {

Review Comment:
   are we changing our usual way of naming getters?
   I was expecting `getNumFileGroups()` 
   similarly, `getPartitionName()` below. 
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-9177] Add Indexer abstraction and refactor metadata table initialization logic [hudi]

Reply via email to