github-actions[bot] commented on code in PR #33173: URL: https://github.com/apache/doris/pull/33173#discussion_r1560617006
########## be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp: ########## @@ -480,23 +500,34 @@ Status DistinctStreamingAggOperatorX::pull(RuntimeState* state, vectorized::Bloc if (!local_state._aggregated_block->empty()) { block->swap(*local_state._aggregated_block); local_state._aggregated_block->clear_column_data(block->columns()); + // The cache block may have additional data due to exceeding the batch size. + if (!local_state._cache_block.empty()) { + local_state._swap_cache_block(local_state._aggregated_block.get()); + } } local_state._make_nullable_output_key(block); - if (_is_streaming_preagg == false) { + if (!_is_streaming_preagg) { // dispose the having clause, should not be execute in prestreaming agg RETURN_IF_ERROR( vectorized::VExprContext::filter_block(_conjuncts, block, block->columns())); } local_state.add_num_rows_returned(block->rows()); - *eos = local_state._child_eos || (_limit != -1 && local_state._output_distinct_rows >= _limit); + COUNTER_UPDATE(local_state.blocks_returned_counter(), 1); + // If the limit is not reached, it is important to ensure that _aggregated_block is empty + // because it may still contain data. + // However, if the limit is reached, there is no need to output data even if some exists. + *eos = (local_state._child_eos && local_state._aggregated_block->empty()) || + (local_state._reach_limit); return Status::OK(); } bool DistinctStreamingAggOperatorX::need_more_input_data(RuntimeState* state) const { auto& local_state = get_local_state(state); - return local_state._aggregated_block->empty() && !local_state._child_eos && - (_limit == -1 || local_state._output_distinct_rows < _limit); + const bool need_batch = local_state._stop_emplace_flag Review Comment: warning: method 'need_more_input_data' can be made static [readability-convert-member-functions-to-static] ```suggestion bool DistinctStreamingAggOperatorX::need_more_input_data(RuntimeState* state) { ``` be/src/pipeline/exec/distinct_streaming_aggregation_operator.h:105: ```diff - bool need_more_input_data(RuntimeState* state) const override; + static bool need_more_input_data(RuntimeState* state) override; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org