hudi-agent commented on code in PR #18919:
URL: https://github.com/apache/hudi/pull/18919#discussion_r3368463610
##########
hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestHoodieAvroFileReaderFactory.java:
##########
@@ -70,4 +84,56 @@ public void testGetFileReader() throws IOException {
.getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, orcPath);
assertTrue(orcReader instanceof HoodieAvroOrcReader);
}
+
+ @ParameterizedTest
+ @ValueSource(booleans = {true, false})
+ public void testHFileReaderPassesBloomFilterConfig(boolean
bloomFilterEnabled)
+ throws IOException, ReflectiveOperationException {
+ HoodieStorage storage = HoodieTestUtils.getDefaultStorage();
+ HoodieFileReaderFactory readerFactory =
HoodieIOFactory.getIOFactory(storage)
+ .getReaderFactory(HoodieRecordType.AVRO);
+ HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
+ .withProperties(DEFAULT_HUDI_CONFIG_FOR_READER.getProps())
+ .withProperties(bloomFilterProps(bloomFilterEnabled))
+ .build();
+
+ assertBloomFilterConfig(readerFactory.getFileReader(metadataConfig,
HFILE_PATH),
+ bloomFilterEnabled);
+ assertBloomFilterConfig(readerFactory.getFileReader(metadataConfig,
+ new StoragePathInfo(HFILE_PATH, 100, false, (short) 0, 0, 0),
HoodieFileFormat.HFILE,
+ Option.<HoodieSchema>empty()), bloomFilterEnabled);
+ assertBloomFilterConfig(readerFactory.getContentReader(metadataConfig,
HFILE_PATH, HoodieFileFormat.HFILE,
+ storage, new byte[0], Option.<HoodieSchema>empty()),
+ bloomFilterEnabled);
+ }
+
+ @Test
+ public void testHFileReaderDoesNotUseBloomFilterByDefault()
+ throws IOException, ReflectiveOperationException {
+ HoodieStorage storage = HoodieTestUtils.getDefaultStorage();
+ HoodieFileReaderFactory readerFactory =
HoodieIOFactory.getIOFactory(storage)
+ .getReaderFactory(HoodieRecordType.AVRO);
+
+
assertBloomFilterConfig(readerFactory.getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER,
HFILE_PATH), false);
+
assertBloomFilterConfig(readerFactory.getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER,
+ new StoragePathInfo(HFILE_PATH, 100, false, (short) 0, 0, 0),
HoodieFileFormat.HFILE,
+ Option.<HoodieSchema>empty()), false);
+
assertBloomFilterConfig(readerFactory.getContentReader(DEFAULT_HUDI_CONFIG_FOR_READER,
HFILE_PATH,
+ HoodieFileFormat.HFILE, storage, new byte[0],
Option.<HoodieSchema>empty()),
+ false);
+ }
+
+ private static TypedProperties bloomFilterProps(boolean enableBloomFilter) {
+ TypedProperties properties = new TypedProperties();
+ properties.setProperty(HoodieMetadataConfig.BLOOM_FILTER_ENABLE.key(),
Boolean.toString(enableBloomFilter));
+ return properties;
+ }
+
+ private static void assertBloomFilterConfig(HoodieFileReader reader, boolean
expectedBloomFilterEnabled)
+ throws ReflectiveOperationException {
+ HoodieNativeAvroHFileReader hfileReader =
assertInstanceOf(HoodieNativeAvroHFileReader.class, reader);
Review Comment:
🤖 nit: `getDeclaredField("useBloomFilter")` ties the test to the private
field name as a string — if `useBloomFilter` is ever renamed in
`HoodieNativeAvroHFileReader`, this breaks at runtime rather than at compile
time. Have you considered adding a package-private or `@VisibleForTesting`
getter on the reader instead?
<sub><i>- AI-generated; verify before applying. React 👍/👎 to flag
quality.</i></sub>
##########
hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileReaderFactory.java:
##########
@@ -73,8 +72,19 @@ protected HoodieFileReader newHFileFileReader(HoodieConfig
hoodieConfig,
HFileReaderFactory.Builder readerFactoryBuilder =
HFileReaderFactory.builder()
.withStorage(storage).withProps(hoodieConfig.getProps())
.withContent(content);
+ return newNativeHFileReader(hoodieConfig, readerFactoryBuilder.build(),
path, schemaOption);
+ }
+
+ private HoodieNativeAvroHFileReader newNativeHFileReader(HoodieConfig
hoodieConfig,
Review Comment:
🤖 nit: `newNativeHFileReader` only reads one field from `HoodieConfig` (the
bloom filter boolean), so the full config object is a bit opaque here — could
you pass `boolean useBloomFilter` directly instead? That makes the method
signature self-documenting about what it actually needs.
<sub><i>- AI-generated; verify before applying. React 👍/👎 to flag
quality.</i></sub>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]