This is an automated email from the ASF dual-hosted git repository. hope pushed a commit to branch release-1.4 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit 18003d5c158e79f596f6121e9cae6ff9c8d19dc6 Author: Jingsong Lee <[email protected]> AuthorDate: Wed Mar 25 16:25:02 2026 +0800 [core] Do not create GlobalIndexScanner when no index files (#7521) Bypass `GlobalIndexScanner` for better performance. --- .../paimon/globalindex/DataEvolutionBatchScan.java | 9 +++++++-- .../paimon/globalindex/GlobalIndexScanner.java | 21 +++++++++++--------- .../apache/paimon/table/source/VectorReadImpl.java | 8 +++++--- .../paimon/table/BitmapGlobalIndexTableTest.java | 2 +- .../paimon/table/BtreeGlobalIndexTableTest.java | 23 +--------------------- .../pypaimon/globalindex/global_index_scanner.py | 6 +++++- .../pypaimon/read/scanner/file_scanner.py | 2 ++ 7 files changed, 33 insertions(+), 38 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java b/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java index af169f4c6d..0542b7d95f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java +++ b/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java @@ -267,8 +267,13 @@ public class DataEvolutionBatchScan implements DataTableScan { } PartitionPredicate partitionFilter = batchScan.snapshotReader().manifestsReader().partitionFilter(); - try (GlobalIndexScanner scanner = - GlobalIndexScanner.create(table, partitionFilter, filter)) { + Optional<GlobalIndexScanner> optionalScanner = + GlobalIndexScanner.create(table, partitionFilter, filter); + if (!optionalScanner.isPresent()) { + return Optional.empty(); + } + + try (GlobalIndexScanner scanner = optionalScanner.get()) { return scanner.scan(filter); } catch (IOException e) { throw new RuntimeException(e); diff --git a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java index 1312f17b43..3092f1b3da 100644 --- a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java +++ b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java @@ -97,17 +97,21 @@ public class GlobalIndexScanner implements Closeable { this.globalIndexEvaluator = new GlobalIndexEvaluator(rowType, readersFunction); } - public static GlobalIndexScanner create( + public static Optional<GlobalIndexScanner> create( FileStoreTable table, Collection<IndexFileMeta> indexFiles) { - return new GlobalIndexScanner( - table.coreOptions().toConfiguration(), - table.rowType(), - table.fileIO(), - table.store().pathFactory().globalIndexFileFactory(), - indexFiles); + if (indexFiles.isEmpty()) { + return Optional.empty(); + } + return Optional.of( + new GlobalIndexScanner( + table.coreOptions().toConfiguration(), + table.rowType(), + table.fileIO(), + table.store().pathFactory().globalIndexFileFactory(), + indexFiles)); } - public static GlobalIndexScanner create( + public static Optional<GlobalIndexScanner> create( FileStoreTable table, PartitionPredicate partitionFilter, Predicate filter) { Set<Integer> filterFieldIds = collectFieldNames(filter).stream() @@ -131,7 +135,6 @@ public class GlobalIndexScanner implements Closeable { .stream() .map(IndexManifestEntry::indexFile) .collect(Collectors.toList()); - return create(table, indexFiles); } diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java index 49708a0a2a..6971bb9084 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java @@ -119,11 +119,13 @@ public class VectorReadImpl implements VectorRead { for (VectorSearchSplit split : splits) { scalarIndexFiles.addAll(split.scalarIndexFiles()); } - if (scalarIndexFiles.isEmpty()) { + + Optional<GlobalIndexScanner> optionalScanner = + GlobalIndexScanner.create(table, scalarIndexFiles); + if (!optionalScanner.isPresent()) { return Optional.empty(); } - - try (GlobalIndexScanner scanner = GlobalIndexScanner.create(table, scalarIndexFiles)) { + try (GlobalIndexScanner scanner = optionalScanner.get()) { return scanner.scan(filter).map(GlobalIndexResult::results); } catch (IOException e) { throw new RuntimeException(e); diff --git a/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java index cd888e6982..6443c6a5ec 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java @@ -248,7 +248,7 @@ public class BitmapGlobalIndexTableTest extends DataEvolutionTestBase { private RoaringNavigableMap64 globalIndexScan(FileStoreTable table, Predicate predicate) throws Exception { try (GlobalIndexScanner scanner = - GlobalIndexScanner.create(table, PartitionPredicate.ALWAYS_TRUE, predicate)) { + GlobalIndexScanner.create(table, PartitionPredicate.ALWAYS_TRUE, predicate).get()) { return scanner.scan(predicate).get().results(); } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java index 98f730359d..4be621942b 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java @@ -19,7 +19,6 @@ package org.apache.paimon.table; import org.apache.paimon.data.BinaryString; -import org.apache.paimon.data.GenericRow; import org.apache.paimon.globalindex.DataEvolutionBatchScan; import org.apache.paimon.globalindex.GlobalIndexResult; import org.apache.paimon.globalindex.GlobalIndexScanner; @@ -29,13 +28,10 @@ import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.predicate.Predicate; import org.apache.paimon.predicate.PredicateBuilder; import org.apache.paimon.table.sink.BatchTableCommit; -import org.apache.paimon.table.sink.BatchTableWrite; -import org.apache.paimon.table.sink.BatchWriteBuilder; import org.apache.paimon.table.sink.CommitMessage; import org.apache.paimon.table.source.DataSplit; import org.apache.paimon.table.source.ReadBuilder; import org.apache.paimon.table.source.Split; -import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Range; import org.apache.paimon.utils.RoaringNavigableMap64; @@ -206,27 +202,10 @@ public class BtreeGlobalIndexTableTest extends DataEvolutionTestBase { .collect(Collectors.toList()); } - private void append(int startInclusive, int endExclusive) throws Exception { - BatchWriteBuilder builder = getTableDefault().newBatchWriteBuilder(); - RowType writeType = schemaDefault().rowType(); - try (BatchTableWrite write0 = builder.newWrite().withWriteType(writeType)) { - for (int i = startInclusive; i < endExclusive; i++) { - write0.write( - GenericRow.of( - i, - BinaryString.fromString("a" + i), - BinaryString.fromString("b" + i))); - } - try (BatchTableCommit commit = builder.newCommit()) { - commit.commit(write0.prepareCommit()); - } - } - } - private RoaringNavigableMap64 globalIndexScan(FileStoreTable table, Predicate predicate) throws Exception { try (GlobalIndexScanner scanner = - GlobalIndexScanner.create(table, PartitionPredicate.ALWAYS_TRUE, predicate)) { + GlobalIndexScanner.create(table, PartitionPredicate.ALWAYS_TRUE, predicate).get()) { return scanner.scan(predicate).get().results(); } } diff --git a/paimon-python/pypaimon/globalindex/global_index_scanner.py b/paimon-python/pypaimon/globalindex/global_index_scanner.py index c3d144811c..515600d77f 100644 --- a/paimon-python/pypaimon/globalindex/global_index_scanner.py +++ b/paimon-python/pypaimon/globalindex/global_index_scanner.py @@ -76,7 +76,7 @@ class GlobalIndexScanner: return GlobalIndexEvaluator(fields, readers_function) @staticmethod - def create(table, index_files=None, partition_filter=None, predicate=None): + def create(table, index_files=None, partition_filter=None, predicate=None) -> Optional['GlobalIndexScanner']: """Create a GlobalIndexScanner. Can be called in two ways: @@ -86,6 +86,8 @@ class GlobalIndexScanner: from pypaimon.index.index_file_handler import IndexFileHandler if index_files is not None: + if len(index_files) == 0: + return None return GlobalIndexScanner( options=table.table_schema.options, fields=table.fields, @@ -117,6 +119,8 @@ class GlobalIndexScanner: entries = index_file_handler.scan(snapshot, index_file_filter) scanned_index_files = [entry.index_file for entry in entries] + if len(scanned_index_files) == 0: + return None return GlobalIndexScanner( options=table.table_schema.options, fields=table.fields, diff --git a/paimon-python/pypaimon/read/scanner/file_scanner.py b/paimon-python/pypaimon/read/scanner/file_scanner.py index b770b15916..a5e8576802 100755 --- a/paimon-python/pypaimon/read/scanner/file_scanner.py +++ b/paimon-python/pypaimon/read/scanner/file_scanner.py @@ -313,6 +313,8 @@ class FileScanner: partition_filter=self.partition_key_predicate, predicate=self.predicate ) + if scanner is None: + return None with scanner: return scanner.scan(self.predicate) except Exception:
