This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new f80fc25fe90 [HUDI-8233] Remove unused codepaths after
HoodieFileGroupReader migration (#13438)
f80fc25fe90 is described below
commit f80fc25fe906686694557b5d8d2f68f9b5c9215e
Author: Tim Brown <[email protected]>
AuthorDate: Mon Jul 7 16:12:31 2025 -0400
[HUDI-8233] Remove unused codepaths after HoodieFileGroupReader migration
(#13438)
---
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 2 -
.../common/table/log/HoodieFileSliceReader.java | 112 -------------------
.../hudi/common/table/log/LogFileIterator.java | 63 -----------
.../common/table/read/HoodieFileGroupReader.java | 3 +-
.../common/util/collection/CachingIterator.java | 39 -------
.../hudi/common/table/log/TestLogFileIterator.java | 120 ---------------------
6 files changed, 1 insertion(+), 338 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index 53bef9cb238..d2a85c2a77d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -1508,9 +1508,7 @@ public class HoodieAvroUtils {
if (populateMetaFields) {
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord)
data,
payloadClass, preCombineField, withOperation);
- // Support HoodieFileSliceReader
} else if (simpleKeyGenFieldsOpt.isPresent()) {
- // TODO in HoodieFileSliceReader may partitionName=option#empty
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord)
data,
payloadClass, preCombineField, simpleKeyGenFieldsOpt.get(),
withOperation, partitionNameOp, schemaWithoutMetaFields);
} else {
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
deleted file mode 100644
index 698e041f1d8..00000000000
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.common.table.log;
-
-import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.model.HoodiePayloadProps;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordMerger;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.collection.ClosableIterator;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.io.storage.HoodieFileReader;
-import org.apache.hudi.keygen.BaseKeyGenerator;
-
-import org.apache.avro.Schema;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.Properties;
-
-public class HoodieFileSliceReader<T> extends LogFileIterator<T> {
- private final Option<HoodieFileReader> baseFileReader;
- private final Option<ClosableIterator<HoodieRecord>> baseFileIterator;
- private final Schema schema;
- private final Properties props;
-
- private final TypedProperties payloadProps = new TypedProperties();
- private final Option<Pair<String, String>> simpleKeyGenFieldsOpt;
- private final Option<BaseKeyGenerator> keyGeneratorOpt;
- Map<String, HoodieRecord> records;
- HoodieRecordMerger merger;
-
- public HoodieFileSliceReader(Option<HoodieFileReader> baseFileReader,
- HoodieMergedLogRecordScanner scanner, Schema
schema, String preCombineField, HoodieRecordMerger merger,
- Properties props, Option<Pair<String, String>>
simpleKeyGenFieldsOpt, Option<BaseKeyGenerator> keyGeneratorOpt) throws
IOException {
- super(scanner);
- this.baseFileReader = baseFileReader;
- if (baseFileReader.isPresent()) {
- this.baseFileIterator =
Option.of(baseFileReader.get().getRecordIterator(schema));
- } else {
- this.baseFileIterator = Option.empty();
- }
- this.schema = schema;
- this.merger = merger;
- if (preCombineField != null) {
-
payloadProps.setProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY,
preCombineField);
- }
- this.props = props;
- this.simpleKeyGenFieldsOpt = simpleKeyGenFieldsOpt;
- this.keyGeneratorOpt = keyGeneratorOpt;
- this.records = this.scanner.getRecords();
- }
-
- private boolean hasNextInternal() {
- while (baseFileIterator.isPresent() && baseFileIterator.get().hasNext()) {
- try {
- HoodieRecord currentRecord = baseFileIterator.get().next();
- String recordKey = currentRecord.getRecordKey(schema, keyGeneratorOpt);
- Option<HoodieRecord> logRecord = removeLogRecord(recordKey);
- if (!logRecord.isPresent()) {
- nextRecord =
currentRecord.wrapIntoHoodieRecordPayloadWithParams(schema, props,
simpleKeyGenFieldsOpt, scanner.isWithOperationField(),
- scanner.getPartitionNameOverride(), false, Option.empty());
- return true;
- }
- Option<Pair<HoodieRecord, Schema>> mergedRecordOpt =
merger.merge(currentRecord, schema, logRecord.get(), schema, payloadProps);
- if (mergedRecordOpt.isPresent()) {
- HoodieRecord<T> mergedRecord = (HoodieRecord<T>)
mergedRecordOpt.get().getLeft();
- nextRecord =
mergedRecord.wrapIntoHoodieRecordPayloadWithParams(schema, props,
simpleKeyGenFieldsOpt, scanner.isWithOperationField(),
- scanner.getPartitionNameOverride(), false, Option.empty());
- return true;
- }
- } catch (IOException e) {
- throw new HoodieIOException("Failed to
wrapIntoHoodieRecordPayloadWithParams: " + e.getMessage());
- }
- }
- return super.doHasNext();
- }
-
- @Override
- protected boolean doHasNext() {
- return hasNextInternal();
- }
-
- @Override
- public void close() {
- super.close();
- if (baseFileIterator.isPresent()) {
- baseFileIterator.get().close();
- }
- if (baseFileReader.isPresent()) {
- baseFileReader.get().close();
- }
- }
-}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogFileIterator.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogFileIterator.java
deleted file mode 100644
index 7331f7d3a9b..00000000000
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogFileIterator.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.common.table.log;
-
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.collection.CachingIterator;
-
-import java.util.Iterator;
-import java.util.Map;
-
-public class LogFileIterator<T> extends CachingIterator<HoodieRecord<T>> {
- HoodieMergedLogRecordScanner scanner;
- Map<String, HoodieRecord> records;
- Iterator<HoodieRecord> iterator;
-
- protected Option<HoodieRecord> removeLogRecord(String key) {
- return Option.ofNullable(records.remove(key));
- }
-
- public LogFileIterator(HoodieMergedLogRecordScanner scanner) {
- this.scanner = scanner;
- this.records = scanner.getRecords();
- }
-
- private boolean hasNextInternal() {
- if (iterator == null) {
- iterator = records.values().iterator();
- }
- if (iterator.hasNext()) {
- nextRecord = iterator.next();
- return true;
- }
- return false;
- }
-
- @Override
- protected boolean doHasNext() {
- return hasNextInternal();
- }
-
- @Override
- public void close() {
- scanner.close();
- }
-}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupReader.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupReader.java
index ee9b6fa2436..db72c8a31c0 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupReader.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupReader.java
@@ -37,7 +37,6 @@ import org.apache.hudi.common.util.ConfigUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.common.util.collection.CachingIterator;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.CloseableMappingIterator;
import org.apache.hudi.common.util.collection.EmptyIterator;
@@ -196,7 +195,7 @@ public final class HoodieFileGroupReader<T> implements
Closeable {
private void initRecordIterators() throws IOException {
ClosableIterator<T> iter = makeBaseFileIterator();
if (logFiles.isEmpty()) {
- this.baseFileIterator = CachingIterator.wrap(iter, readerContext);
+ this.baseFileIterator = new CloseableMappingIterator<>(iter,
readerContext::seal);
} else {
this.baseFileIterator = iter;
scanLogFiles();
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/CachingIterator.java
b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/CachingIterator.java
index 58d5aa78709..593fbad61e1 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/CachingIterator.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/CachingIterator.java
@@ -19,8 +19,6 @@
package org.apache.hudi.common.util.collection;
-import org.apache.hudi.common.engine.HoodieReaderContext;
-
public abstract class CachingIterator<T> implements ClosableIterator<T> {
protected T nextRecord;
@@ -38,41 +36,4 @@ public abstract class CachingIterator<T> implements
ClosableIterator<T> {
nextRecord = null;
return record;
}
-
- public static <U> CachingIterator<U> wrap(ClosableIterator<U> iterator) {
- return new CachingIterator<U>() {
- @Override
- protected boolean doHasNext() {
- if (iterator.hasNext()) {
- nextRecord = iterator.next();
- return true;
- }
- return false;
- }
-
- @Override
- public void close() {
- iterator.close();
- }
- };
- }
-
- public static <U> CachingIterator<U> wrap(ClosableIterator<U> iterator,
HoodieReaderContext<U> readerContext) {
- return new CachingIterator<U>() {
- @Override
- protected boolean doHasNext() {
- if (iterator.hasNext()) {
- nextRecord = readerContext.seal(iterator.next());
- return true;
- }
- return false;
- }
-
- @Override
- public void close() {
- iterator.close();
- }
- };
- }
-
}
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogFileIterator.java
b/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogFileIterator.java
deleted file mode 100644
index 3ec2d132966..00000000000
---
a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogFileIterator.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.common.table.log;
-
-import org.apache.hudi.common.model.HoodieAvroRecord;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.serialization.DefaultSerializer;
-import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
-import org.apache.hudi.common.util.DefaultSizeEstimator;
-import org.apache.hudi.common.util.HoodieRecordSizeEstimator;
-import org.apache.hudi.common.util.collection.ExternalSpillableMap;
-
-import org.junit.jupiter.api.Test;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.mockito.Mockito.when;
-
-public class TestLogFileIterator {
-
- @Test
- public void testIteratorWithPlainHashMap() {
- // 1) Create a simple in-memory Map of HoodieRecords
- Map<String, HoodieRecord> inMemoryMap = new HashMap<>();
- HoodieRecord record1 = new HoodieAvroRecord(new HoodieKey("key1", "p1"),
null);
- HoodieRecord record2 = new HoodieAvroRecord(new HoodieKey("key2", "p2"),
null);
- inMemoryMap.put("key1", record1);
- inMemoryMap.put("key2", record2);
-
- // 2) Mock a scanner that returns the inMemoryMap
- HoodieMergedLogRecordScanner scanner =
Mockito.mock(HoodieMergedLogRecordScanner.class);
- when(scanner.getRecords()).thenReturn(inMemoryMap);
-
- // 3) Create the LogFileIterator using the new fix
- LogFileIterator<HoodieRecord> iterator = new LogFileIterator<>(scanner);
-
- // 4) Validate iteration (should use records.values().iterator())
- assertTrue(iterator.hasNext());
- HoodieRecord next1 = iterator.next();
- assertNotNull(next1);
-
- assertTrue(iterator.hasNext());
- HoodieRecord next2 = iterator.next();
- assertNotNull(next2);
-
- assertFalse(iterator.hasNext());
- iterator.close();
- }
-
- @Test
- public void testIteratorWithExternalSpillableMap() throws Exception {
- // 1) Create ExternalSpillableMap
- ExternalSpillableMap<String, HoodieRecord> spillableMap =
getSpillableRecordMap();
-
- // 3) Mock a scanner that returns the ExternalSpillableMap
- HoodieMergedLogRecordScanner scanner =
Mockito.mock(HoodieMergedLogRecordScanner.class);
- when(scanner.getRecords()).thenReturn(spillableMap);
-
- // 4) Create the LogFileIterator (with the fix)
- LogFileIterator<HoodieRecord> iterator = new LogFileIterator<>(scanner);
-
- // 5) Validate iteration (should use ExternalSpillableMap.iterator(), not
values().iterator())
- int count = 0;
- while (iterator.hasNext()) {
- HoodieRecord rec = iterator.next();
- assertNotNull(rec);
- count++;
- }
- // We expect 2 records
- assertEquals(2, count);
-
- iterator.close();
- }
-
- private static ExternalSpillableMap<String, HoodieRecord>
getSpillableRecordMap() throws IOException {
- ExternalSpillableMap<String, HoodieRecord> spillableMap =
- new ExternalSpillableMap<>(
- 1L, // maxInMemorySizeInBytes
- "/tmp", // basePathForSpill
- new DefaultSizeEstimator(),
- new HoodieRecordSizeEstimator(HoodieTestDataGenerator.AVRO_SCHEMA),
- ExternalSpillableMap.DiskMapType.BITCASK,
- new DefaultSerializer<>(),
- false,
- TestLogFileIterator.class.getSimpleName()
- );
-
- // 2) Put some records in the spillable map
- HoodieRecord recordA = new HoodieAvroRecord(new HoodieKey("keyA", "p1"),
null);
- HoodieRecord recordB = new HoodieAvroRecord(new HoodieKey("keyB", "p2"),
null);
- spillableMap.put("keyA", recordA);
- spillableMap.put("keyB", recordB);
- return spillableMap;
- }
-}