jonvex commented on code in PR #11171:
URL: https://github.com/apache/hudi/pull/11171#discussion_r1596186554


##########
hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java:
##########
@@ -200,582 +166,4 @@ public void dropIndex() {
   public boolean isPresent() {
     return isPresent;
   }
-
-  /**
-   * HFile Based Index Reader.
-   */
-  public static class HFileBootstrapIndexReader extends 
BootstrapIndex.IndexReader {
-
-    // Base Path of external files.
-    private final String bootstrapBasePath;
-    // Well Known Paths for indices
-    private final String indexByPartitionPath;
-    private final String indexByFileIdPath;
-
-    // Index Readers
-    private transient HFileReader indexByPartitionReader;
-    private transient HFileReader indexByFileIdReader;
-
-    // Bootstrap Index Info
-    private transient HoodieBootstrapIndexInfo bootstrapIndexInfo;
-
-    public HFileBootstrapIndexReader(HoodieTableMetaClient metaClient) {
-      super(metaClient);
-      StoragePath indexByPartitionPath = partitionIndexPath(metaClient);
-      StoragePath indexByFilePath = fileIdIndexPath(metaClient);
-      this.indexByPartitionPath = indexByPartitionPath.toString();
-      this.indexByFileIdPath = indexByFilePath.toString();
-      initIndexInfo();
-      this.bootstrapBasePath = bootstrapIndexInfo.getBootstrapBasePath();
-      LOG.info("Loaded HFileBasedBootstrapIndex with source base path :" + 
bootstrapBasePath);
-    }
-
-    /**
-     * Helper method to create native HFile Reader.
-     *
-     * @param hFilePath file path.
-     * @param storage   {@link HoodieStorage} instance.
-     */
-    private static HFileReader createReader(String hFilePath, HoodieStorage 
storage) throws IOException {
-      LOG.info("Opening HFile for reading :" + hFilePath);
-      StoragePath path = new StoragePath(hFilePath);
-      long fileSize = storage.getPathInfo(path).getLength();
-      SeekableDataInputStream stream = storage.openSeekable(path);
-      return new HFileReaderImpl(stream, fileSize);
-    }
-
-    private synchronized void initIndexInfo() {
-      if (bootstrapIndexInfo == null) {
-        try {
-          bootstrapIndexInfo = fetchBootstrapIndexInfo();
-        } catch (IOException ioe) {
-          throw new HoodieException(ioe.getMessage(), ioe);
-        }
-      }
-    }
-
-    private HoodieBootstrapIndexInfo fetchBootstrapIndexInfo() throws 
IOException {
-      return TimelineMetadataUtils.deserializeAvroMetadata(
-          partitionIndexReader().getMetaInfo(new 
UTF8StringKey(INDEX_INFO_KEY_STRING)).get(),
-          HoodieBootstrapIndexInfo.class);
-    }
-
-    private synchronized HFileReader partitionIndexReader() throws IOException 
{
-      if (indexByPartitionReader == null) {
-        LOG.info("Opening partition index :" + indexByPartitionPath);
-        this.indexByPartitionReader = createReader(indexByPartitionPath, 
metaClient.getStorage());
-      }
-      return indexByPartitionReader;
-    }
-
-    private synchronized HFileReader fileIdIndexReader() throws IOException {
-      if (indexByFileIdReader == null) {
-        LOG.info("Opening fileId index :" + indexByFileIdPath);
-        this.indexByFileIdReader = createReader(indexByFileIdPath, 
metaClient.getStorage());
-      }
-      return indexByFileIdReader;
-    }
-
-    @Override
-    public List<String> getIndexedPartitionPaths() {
-      try {
-        return getAllKeys(partitionIndexReader(), 
HFileBootstrapIndex::getPartitionFromKey);
-      } catch (IOException e) {
-        throw new HoodieIOException("Unable to read indexed partition paths.", 
e);
-      }
-    }
-
-    @Override
-    public List<HoodieFileGroupId> getIndexedFileGroupIds() {
-      try {
-        return getAllKeys(fileIdIndexReader(), 
HFileBootstrapIndex::getFileGroupFromKey);
-      } catch (IOException e) {
-        throw new HoodieIOException("Unable to read indexed file group IDs.", 
e);
-      }
-    }
-
-    private <T> List<T> getAllKeys(HFileReader reader, Function<String, T> 
converter) {
-      List<T> keys = new ArrayList<>();
-      try {
-        boolean available = reader.seekTo();
-        while (available) {
-          
keys.add(converter.apply(reader.getKeyValue().get().getKey().getContentInString()));
-          available = reader.next();
-        }
-      } catch (IOException ioe) {
-        throw new HoodieIOException(ioe.getMessage(), ioe);
-      }
-
-      return keys;
-    }
-
-    @Override
-    public List<BootstrapFileMapping> getSourceFileMappingForPartition(String 
partition) {
-      try {
-        HFileReader reader = partitionIndexReader();
-        Key lookupKey = new UTF8StringKey(getPartitionKey(partition));
-        reader.seekTo();
-        if (reader.seekTo(lookupKey) == HFileReader.SEEK_TO_FOUND) {
-          org.apache.hudi.io.hfile.KeyValue keyValue = 
reader.getKeyValue().get();
-          byte[] valBytes = IOUtils.copy(
-              keyValue.getBytes(), keyValue.getValueOffset(), 
keyValue.getValueLength());
-          HoodieBootstrapPartitionMetadata metadata =
-              TimelineMetadataUtils.deserializeAvroMetadata(valBytes, 
HoodieBootstrapPartitionMetadata.class);
-          return metadata.getFileIdToBootstrapFile().entrySet().stream()
-              .map(e -> new BootstrapFileMapping(bootstrapBasePath, 
metadata.getBootstrapPartitionPath(),
-                  partition, e.getValue(), 
e.getKey())).collect(Collectors.toList());
-        } else {
-          LOG.warn("No value found for partition key (" + partition + ")");
-          return new ArrayList<>();
-        }
-      } catch (IOException ioe) {
-        throw new HoodieIOException(ioe.getMessage(), ioe);
-      }
-    }
-
-    @Override
-    public String getBootstrapBasePath() {
-      return bootstrapBasePath;
-    }
-
-    @Override
-    public Map<HoodieFileGroupId, BootstrapFileMapping> 
getSourceFileMappingForFileIds(
-        List<HoodieFileGroupId> ids) {
-      Map<HoodieFileGroupId, BootstrapFileMapping> result = new HashMap<>();
-      // Arrange input Keys in sorted order for 1 pass scan
-      List<HoodieFileGroupId> fileGroupIds = new ArrayList<>(ids);
-      Collections.sort(fileGroupIds);
-      try {
-        HFileReader reader = fileIdIndexReader();
-        reader.seekTo();
-        for (HoodieFileGroupId fileGroupId : fileGroupIds) {
-          Key lookupKey = new UTF8StringKey(getFileGroupKey(fileGroupId));
-          if (reader.seekTo(lookupKey) == HFileReader.SEEK_TO_FOUND) {
-            org.apache.hudi.io.hfile.KeyValue keyValue = 
reader.getKeyValue().get();
-            byte[] valBytes = IOUtils.copy(
-                keyValue.getBytes(), keyValue.getValueOffset(), 
keyValue.getValueLength());
-            HoodieBootstrapFilePartitionInfo fileInfo = 
TimelineMetadataUtils.deserializeAvroMetadata(valBytes,
-                HoodieBootstrapFilePartitionInfo.class);
-            BootstrapFileMapping mapping = new 
BootstrapFileMapping(bootstrapBasePath,
-                fileInfo.getBootstrapPartitionPath(), 
fileInfo.getPartitionPath(), fileInfo.getBootstrapFileStatus(),
-                fileGroupId.getFileId());
-            result.put(fileGroupId, mapping);
-          }
-        }
-      } catch (IOException ioe) {
-        throw new HoodieIOException(ioe.getMessage(), ioe);
-      }
-      return result;
-    }
-
-    @Override
-    public void close() {
-      try {
-        if (indexByPartitionReader != null) {
-          indexByPartitionReader.close();
-          indexByPartitionReader = null;
-        }
-        if (indexByFileIdReader != null) {
-          indexByFileIdReader.close();
-          indexByFileIdReader = null;
-        }
-      } catch (IOException ioe) {
-        throw new HoodieIOException(ioe.getMessage(), ioe);
-      }
-    }
-  }
-
-  /**
-   * HBase HFile reader based Index Reader.  This is deprecated.
-   */
-  public static class HBaseHFileBootstrapIndexReader extends 
BootstrapIndex.IndexReader {

Review Comment:
   put it into it's own file



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to