yiguolei commented on code in PR #30746: URL: https://github.com/apache/doris/pull/30746#discussion_r1477191521
########## be/src/vec/exec/scan/scanner_context.cpp: ########## @@ -210,148 +136,238 @@ Status ScannerContext::init() { thread_token == nullptr ? "False" : "True"); } + // submit `_max_thread_num` running scanners to `ScannerScheduler` + // When a running scanners is finished, it will submit one of the remaining scanners. + for (int i = 0; i < _max_thread_num; ++i) { + std::weak_ptr<ScannerDelegate> next_scanner; + if (_scanners.try_dequeue(next_scanner)) { + vectorized::BlockUPtr block = get_free_block(_batch_size); + submit_running_scanner( + std::make_shared<RunningScanner>(next_scanner, std::move(block))); + _num_running_scanners++; + } + } + return Status::OK(); } std::string ScannerContext::parent_name() { return _parent ? _parent->get_name() : _local_state->get_name(); } -vectorized::BlockUPtr ScannerContext::get_free_block() { +vectorized::BlockUPtr ScannerContext::get_free_block(int batch_size) { vectorized::BlockUPtr block; if (_free_blocks.try_dequeue(block)) { + std::lock_guard<std::mutex> fl(_free_blocks_lock); DCHECK(block->mem_reuse()); - _free_blocks_memory_usage->add(-block->allocated_bytes()); - _serving_blocks_num++; + _free_blocks_memory_usage -= block->allocated_bytes(); + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); return block; } - block = vectorized::Block::create_unique(_output_tuple_desc->slots(), _batch_size, - true /*ignore invalid slots*/); - - COUNTER_UPDATE(_newly_create_free_blocks_num, 1); - - _serving_blocks_num++; - return block; + return vectorized::Block::create_unique(_output_tuple_desc->slots(), batch_size, + true /*ignore invalid slots*/); } -void ScannerContext::return_free_block(std::unique_ptr<vectorized::Block> block) { - _serving_blocks_num--; - if (block->mem_reuse()) { - // Only put blocks with schema to free blocks, because colocate blocks - // need schema. - _estimated_block_bytes = std::max(block->allocated_bytes(), (size_t)16); +void ScannerContext::return_free_block(vectorized::BlockUPtr block) { + std::lock_guard<std::mutex> fl(_free_blocks_lock); + if (block->mem_reuse() && _free_blocks_memory_usage < _max_bytes_in_queue) { block->clear_column_data(); - _free_blocks_memory_usage->add(block->allocated_bytes()); + _free_blocks_memory_usage += block->allocated_bytes(); + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); _free_blocks.enqueue(std::move(block)); } } -void ScannerContext::append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks) { - std::lock_guard l(_transfer_lock); - auto old_bytes_in_queue = _cur_bytes_in_queue; - for (auto& b : blocks) { - auto st = validate_block_schema(b.get()); - if (!st.ok()) { - set_status_on_error(st, false); +bool ScannerContext::empty_in_queue(int id) { + std::lock_guard<std::mutex> l(_transfer_lock); + return _blocks_queue.empty(); +} + +void ScannerContext::submit_running_scanner(std::shared_ptr<RunningScanner> running_scanner) { + _num_scheduled_scanners++; + _scanner_scheduler->submit(shared_from_this(), running_scanner); +} + +void ScannerContext::append_block_to_queue(std::shared_ptr<RunningScanner> running_scanner) { + Status st = validate_block_schema(running_scanner->current_block.get()); + if (!st.ok()) { + running_scanner->status = st; Review Comment: add a method set_status to running scanner. The status member in running scanner should be private. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org