manojpec commented on a change in pull request #4352:
URL: https://github.com/apache/hudi/pull/4352#discussion_r781019171



##########
File path: 
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
##########
@@ -109,55 +194,92 @@ private HoodieMetadataPayload(String key, int type, 
Map<String, HoodieMetadataFi
    */
   public static HoodieRecord<HoodieMetadataPayload> 
createPartitionListRecord(List<String> partitions) {
     Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
-    partitions.forEach(partition -> fileInfo.put(partition, new 
HoodieMetadataFileInfo(0L,  false)));
+    partitions.forEach(partition -> fileInfo.put(partition, new 
HoodieMetadataFileInfo(0L, false)));
 
     HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, 
MetadataPartitionType.FILES.partitionPath());
-    HoodieMetadataPayload payload = new 
HoodieMetadataPayload(key.getRecordKey(), PARTITION_LIST, fileInfo);
+    HoodieMetadataPayload payload = new 
HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
+        fileInfo);
     return new HoodieRecord<>(key, payload);
   }
 
   /**
    * Create and return a {@code HoodieMetadataPayload} to save list of files 
within a partition.
    *
-   * @param partition The name of the partition
-   * @param filesAdded Mapping of files to their sizes for files which have 
been added to this partition
+   * @param partition    The name of the partition
+   * @param filesAdded   Mapping of files to their sizes for files which have 
been added to this partition
    * @param filesDeleted List of files which have been deleted from this 
partition
    */
   public static HoodieRecord<HoodieMetadataPayload> 
createPartitionFilesRecord(String partition,
-                                                                               
Option<Map<String, Long>> filesAdded, Option<List<String>> filesDeleted) {
+                                                                               
Option<Map<String, Long>> filesAdded,
+                                                                               
Option<List<String>> filesDeleted) {
     Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
     filesAdded.ifPresent(
         m -> m.forEach((filename, size) -> fileInfo.put(filename, new 
HoodieMetadataFileInfo(size, false))));
     filesDeleted.ifPresent(
-        m -> m.forEach(filename -> fileInfo.put(filename, new 
HoodieMetadataFileInfo(0L,  true))));
+        m -> m.forEach(filename -> fileInfo.put(filename, new 
HoodieMetadataFileInfo(0L, true))));
 
     HoodieKey key = new HoodieKey(partition, 
MetadataPartitionType.FILES.partitionPath());
-    HoodieMetadataPayload payload = new 
HoodieMetadataPayload(key.getRecordKey(), FILE_LIST, fileInfo);
+    HoodieMetadataPayload payload = new 
HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_FILE_LIST, fileInfo);
     return new HoodieRecord<>(key, payload);
   }
 
+  /**
+   * Create bloom filter metadata record.
+   *
+   * @param fileID      - FileID for which the bloom filter needs to persisted
+   * @param timestamp   - Instant timestamp responsible for this record
+   * @param bloomFilter - Bloom filter for the File
+   * @param isDeleted   - Is the bloom filter no more valid
+   * @return Metadata payload containing the fileID and its bloom filter record
+   */
+  public static HoodieRecord<HoodieMetadataPayload> 
createBloomFilterMetadataRecord(final PartitionIndexID partitionID,
+                                                                               
     final FileIndexID fileID,
+                                                                               
     final String timestamp,
+                                                                               
     final ByteBuffer bloomFilter,
+                                                                               
     final boolean isDeleted) {
+    final String bloomFilterKey = 
partitionID.asBase64EncodedString().concat(fileID.asBase64EncodedString());
+    HoodieKey key = new HoodieKey(bloomFilterKey, 
MetadataPartitionType.BLOOM_FILTERS.partitionPath());
+
+    // TODO: Get the bloom filter type from the file
+    HoodieMetadataBloomFilter metadataBloomFilter =
+        new HoodieMetadataBloomFilter(BloomFilterTypeCode.DYNAMIC_V0.name(),
+            timestamp, bloomFilter, isDeleted, ByteBuffer.allocate(0));
+    HoodieMetadataPayload metadataPayload = new 
HoodieMetadataPayload(key.getRecordKey(),
+        HoodieMetadataPayload.METADATA_TYPE_BLOOM_FILTER, metadataBloomFilter);
+    return new HoodieRecord<>(key, metadataPayload);
+  }
+
   @Override
   public HoodieMetadataPayload preCombine(HoodieMetadataPayload 
previousRecord) {
     ValidationUtils.checkArgument(previousRecord.type == type,
-        "Cannot combine " + previousRecord.type  + " with " + type);
-
-    Map<String, HoodieMetadataFileInfo> combinedFileInfo = null;
+        "Cannot combine " + previousRecord.type + " with " + type);
 
     switch (type) {
-      case PARTITION_LIST:
-      case FILE_LIST:
-        combinedFileInfo = combineFilesystemMetadata(previousRecord);
-        break;
+      case METADATA_TYPE_PARTITION_LIST:
+      case METADATA_TYPE_FILE_LIST:
+        Map<String, HoodieMetadataFileInfo> combinedFileInfo = 
combineFilesystemMetadata(previousRecord);
+        return new HoodieMetadataPayload(key, type, combinedFileInfo);
+      case METADATA_TYPE_BLOOM_FILTER:
+        HoodieMetadataBloomFilter combineBloomFilterMetadata = 
combineBloomFilterMetadata(previousRecord);
+        return new HoodieMetadataPayload(key, type, 
combineBloomFilterMetadata);
+      case METADATA_TYPE_COLUMN_STATS:
+        return new HoodieMetadataPayload(key, type, 
combineColumnStats(previousRecord));
       default:
         throw new HoodieMetadataException("Unknown type of 
HoodieMetadataPayload: " + type);
     }
+  }
 
-    return new HoodieMetadataPayload(key, type, combinedFileInfo);
+  private HoodieMetadataBloomFilter 
combineBloomFilterMetadata(HoodieMetadataPayload previousRecord) {
+    return this.bloomFilterMetadata;

Review comment:
       Right, always going with the latest bloom version. Not finding the 
reason to merge the filters so far.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to