yiguolei commented on code in PR #44690:
URL: https://github.com/apache/doris/pull/44690#discussion_r1926416036


##########
be/src/vec/exec/scan/scanner_context.cpp:
##########
@@ -147,87 +153,72 @@ Status ScannerContext::init() {
         }
     }
 
-    // _scannner_scheduler will be used to submit scan task.
-    // file_scan_operator currentlly has performance issue if we submit too 
many scan tasks to scheduler.
-    // we should fix this problem in the future.
-    if (_scanner_scheduler->get_queue_size() * 2 > 
config::doris_scanner_thread_pool_queue_size ||
-        _is_file_scan_operator) {
-        submit_many_scan_tasks_for_potential_performance_issue = false;
-    }
-
-    // _max_thread_num controls how many scanners of this ScanOperator can be 
submitted to scheduler at a time.
     // The overall target of our system is to make full utilization of the 
resources.
     // At the same time, we dont want too many tasks are queued by scheduler, 
that is not necessary.
-    // So, first of all, we try to make sure _max_thread_num of a ScanNode of 
a query on a single backend is less than
-    // 2 * config::doris_scanner_thread_pool_thread_num, so that we can make 
all io threads busy.
-    // For example, on a 64-core machine, the default value of 
config::doris_scanner_thread_pool_thread_num will be 64*2 =128.
-    // and the num_parallel_instances of this scan operator will be 64/2=32.
-    // For a query who has one scan nodes, the _max_thread_num of each scan 
node instance will be 4 * 128 / 32 = 16.
-    // We have 32 instances of this scan operator, so for the ScanNode, we 
have 16 * 32 = 8 * 64 = 512 scanner tasks can be submitted at a time.
-    _max_thread_num = _state->num_scanner_threads() > 0 ? 
_state->num_scanner_threads() : 0;
-
-    if (_max_thread_num == 0) {
-        // NOTE: When ignore_data_distribution is true, the parallelism
-        // of the scan operator is regarded as 1 (actually maybe not).
-        // That will make the number of scan task can be submitted to the 
scheduler
-        // in a vary large value. This logicl is kept from the older 
implementation.
-        if (submit_many_scan_tasks_for_potential_performance_issue || 
_ignore_data_distribution) {
-            _max_thread_num = config::doris_scanner_thread_pool_thread_num / 1;
+    if (_max_concurrency = _state->num_scanner_threads(); _max_concurrency == 
0) {
+        if (_serial_scan_operator) {
+            // If the scan operator is serial, we need to boost the 
concurrency to ensure a single scan operator
+            // could make full utilization of the resource.
+            _max_concurrency = _min_concurrency_of_scan_scheduler;
         } else {
-            const size_t factor = _is_file_scan_operator ? 1 : 4;
-            _max_thread_num = factor * 
(config::doris_scanner_thread_pool_thread_num /
-                                        num_parallel_instances);
+            _max_concurrency = _min_concurrency_of_scan_scheduler / 
num_parallel_instances;

Review Comment:
   这个如果是shared scan 这里会是怎么样?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to