manojpec commented on a change in pull request #4352: URL: https://github.com/apache/hudi/pull/4352#discussion_r793427314
########## File path: hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java ########## @@ -63,7 +72,9 @@ // Directory used for Spillable Map when merging records protected final String spillableMapDirectory; - protected boolean enabled; + protected boolean isMetadataTableEnabled; + protected boolean isMetaIndexBloomFilterEnabled = false; Review comment: This is the base table. This is set in the derived class during the initialization. More changes around this will come as part of HUDI-3142. ########## File path: hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java ########## @@ -146,12 +156,119 @@ protected BaseTableMetadata(HoodieEngineContext engineContext, HoodieMetadataCon .getAllFilesInPartitions(partitions); } + @Override + public Option<ByteBuffer> getBloomFilter(final String partitionName, final String fileName) + throws HoodieMetadataException { + if (!isMetaIndexBloomFilterEnabled) { + LOG.error("Meta index for bloom filters is disabled!"); + return Option.empty(); + } + + final Pair<String, String> partitionFileName = Pair.of(partitionName, fileName); + Map<Pair<String, String>, ByteBuffer> bloomFilters = getBloomFilters(Collections.singletonList(partitionFileName)); + if (bloomFilters.isEmpty()) { + LOG.error("Meta index: missing bloom filter for partition: " + partitionName + ", file: " + fileName); + return Option.empty(); + } + + ValidationUtils.checkState(bloomFilters.containsKey(partitionFileName)); + return Option.of(bloomFilters.get(partitionFileName)); + } + + @Override + public Map<Pair<String, String>, ByteBuffer> getBloomFilters(final List<Pair<String, String>> partitionNameFileNameList) + throws HoodieMetadataException { + if (!isMetaIndexBloomFilterEnabled) { + LOG.error("Meta index for bloom filter is disabled!"); + return Collections.emptyMap(); + } + + HoodieTimer timer = new HoodieTimer().startTimer(); + Set<String> partitionIDFileIDSortedStrings = new TreeSet<>(); + Map<String, Pair<String, String>> fileToKeyMap = new HashMap<>(); + partitionNameFileNameList.forEach(partitionNameFileNamePair -> { + final String bloomKey = new PartitionIndexID(partitionNameFileNamePair.getLeft()).asBase64EncodedString() + .concat(new FileIndexID(partitionNameFileNamePair.getRight()).asBase64EncodedString()); + partitionIDFileIDSortedStrings.add(bloomKey); + fileToKeyMap.put(bloomKey, partitionNameFileNamePair); + } + ); + + List<String> partitionIDFileIDStrings = new ArrayList<>(partitionIDFileIDSortedStrings); + List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList = + getRecordsByKeys(partitionIDFileIDStrings, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath()); + metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_BLOOM_FILTERS_METADATA_STR, timer.endTimer())); Review comment: fixed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org