This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 039ac956206 [enhance](multi-catalog) Split metadata scan ranges per 
split (#60257)
039ac956206 is described below

commit 039ac9562069633bff403e00eae0ea28fce734fe
Author: Socrates <[email protected]>
AuthorDate: Thu Jan 29 17:04:54 2026 +0800

    [enhance](multi-catalog) Split metadata scan ranges per split (#60257)
    
    ### What problem does this PR solve?
    
    - related pr: #54804
    
    Metadata scans (e.g., Iceberg all_files) can produce many splits, but
    the prior logic grouped them into at most one scan range per backend,
    which capped `NumScanners` and `MaxScanConcurrency` at 1 in common
    cases. This change aligns metadata scans with per-split assignment used
    by file scans, enabling higher parallelism.
---
 .../doris/datasource/tvf/source/MetadataScanNode.java   | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java
index ef84782d1db..87fd0e58aa9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java
@@ -90,23 +90,18 @@ public class MetadataScanNode extends ExternalScanNode {
             scanRangeLocations.add(locations);
         } else {
             // need to split ranges to send to backends
-            List<Backend> backends = 
Lists.newArrayList(backendPolicy.getBackends());
             List<String> splits = metaScanRange.getSerializedSplits();
-            int numSplitsPerBE = Math.max(1, splits.size() / backends.size());
+            int maxConcurrency = 
ConnectContext.get().getSessionVariable().getMaxScannersConcurrency();
+            int targetRanges = backendPolicy.numBackends() * Math.max(1, 
maxConcurrency);
+            int splitsPerRange = (int) Math.ceil((double) splits.size() / 
targetRanges);
+            for (int from = 0; from < splits.size(); from += splitsPerRange) {
+                int to = Math.min(from + splitsPerRange, splits.size());
+                Backend backend = backendPolicy.getNextBe();
 
-            for (int i = 0; i < backends.size(); i++) {
-                int from = i * numSplitsPerBE;
-                if (from >= splits.size()) {
-                    continue; // no splits for this backend
-                }
-                int to = Math.min((i + 1) * numSplitsPerBE, splits.size());
-
-                // set splited task to TMetaScanRange
                 TMetaScanRange subRange = metaScanRange.deepCopy();
                 subRange.setSerializedSplits(splits.subList(from, to));
 
                 TScanRangeLocation location = new TScanRangeLocation();
-                Backend backend = backends.get(i);
                 location.setBackendId(backend.getId());
                 location.setServer(new TNetworkAddress(backend.getHost(), 
backend.getBePort()));
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to