This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new ea264ce9de [Opt](join) short circuit probe for join node (#20585) ea264ce9de is described below commit ea264ce9de3329d661a1e89a8d33d4496118fb64 Author: HappenLee <happen...@hotmail.com> AuthorDate: Mon Jun 12 16:01:09 2023 +0800 [Opt](join) short circuit probe for join node (#20585) Support the _short_circuit_for_probe for join node --- be/src/pipeline/exec/operator.h | 3 ++- be/src/vec/exec/join/vhash_join_node.cpp | 16 ++++++++-------- be/src/vec/exec/join/vhash_join_node.h | 11 +++++++++++ be/src/vec/exec/join/vjoin_node_base.h | 7 +++++++ 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index 2ef903b0e4..0a31435b8f 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -419,7 +419,8 @@ public: return Status::OK(); } node->prepare_for_next(); - node->push(state, _child_block.get(), _child_source_state == SourceState::FINISHED); + RETURN_IF_ERROR(node->push(state, _child_block.get(), + _child_source_state == SourceState::FINISHED)); } if (!node->need_more_input_data()) { diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 1628af157e..2dfb93a98d 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -521,7 +521,7 @@ Status HashJoinNode::close(RuntimeState* state) { bool HashJoinNode::need_more_input_data() const { return (_probe_block.rows() == 0 || _probe_index == _probe_block.rows()) && !_probe_eos && - !_short_circuit_for_null_in_probe_side; + !_short_circuit_for_probe; } void HashJoinNode::prepare_for_next() { @@ -531,9 +531,8 @@ void HashJoinNode::prepare_for_next() { Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_block, bool* eos) { SCOPED_TIMER(_probe_timer); - if (_short_circuit_for_null_in_probe_side) { - // If we use a short-circuit strategy for null value in build side (e.g. if join operator is - // NULL_AWARE_LEFT_ANTI_JOIN), we should return empty block directly. + if (_short_circuit_for_probe) { + // If we use a short-circuit strategy, should return empty block directly. *eos = true; return Status::OK(); } @@ -664,9 +663,8 @@ Status HashJoinNode::push(RuntimeState* /*state*/, vectorized::Block* input_bloc Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - if (_short_circuit_for_null_in_probe_side) { - // If we use a short-circuit strategy for null value in build side (e.g. if join operator is - // NULL_AWARE_LEFT_ANTI_JOIN), we should return empty block directly. + if (_short_circuit_for_probe) { + // If we use a short-circuit strategy, should return empty block directly. *eos = true; return Status::OK(); } @@ -952,6 +950,8 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc if (!_build_blocks->empty() && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { _probe_ignore_null = true; } + _init_short_circuit_for_probe(); + return Status::OK(); } @@ -960,7 +960,7 @@ void HashJoinNode::debug_string(int indentation_level, std::stringstream* out) c *out << "HashJoin(need_more_input_data=" << (need_more_input_data() ? "true" : "false") << " _probe_block.rows()=" << _probe_block.rows() << " _probe_index=" << _probe_index << " _probe_eos=" << _probe_eos - << " _short_circuit_for_null_in_probe_side=" << _short_circuit_for_null_in_probe_side; + << " _short_circuit_for_probe_side=" << _short_circuit_for_probe; *out << ")\n children=("; ExecNode::debug_string(indentation_level, out); *out << ")"; diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 7430bd6ab1..286a0783a6 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -253,6 +253,17 @@ public: bool should_build_hash_table() const { return _should_build_hash_table; } private: + void _init_short_circuit_for_probe() override { + _short_circuit_for_probe = + (_short_circuit_for_null_in_probe_side && + _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) || + (_build_blocks->empty() && _join_op == TJoinOp::INNER_JOIN && !_is_mark_join) || + (_build_blocks->empty() && _join_op == TJoinOp::LEFT_SEMI_JOIN && !_is_mark_join) || + (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_OUTER_JOIN) || + (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_SEMI_JOIN) || + (_build_blocks->empty() && _join_op == TJoinOp::RIGHT_ANTI_JOIN); + } + // probe expr VExprContextSPtrs _probe_expr_ctxs; // build expr diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h index 757670b31e..ce644c159e 100644 --- a/be/src/vec/exec/join/vjoin_node_base.h +++ b/be/src/vec/exec/join/vjoin_node_base.h @@ -93,6 +93,8 @@ protected: // Materialize build relation. For HashJoin, it will build a hash table while a list of build blocks for NLJoin. virtual Status _materialize_build_side(RuntimeState* state) = 0; + virtual void _init_short_circuit_for_probe() { _short_circuit_for_probe = false; } + TJoinOp::type _join_op; JoinOpVariants _join_op_variants; @@ -113,6 +115,11 @@ protected: const bool _short_circuit_for_null_in_build_side = false; bool _short_circuit_for_null_in_probe_side = false; + // For some join case, we can apply a short circuit strategy + // 1. _short_circuit_for_null_in_probe_side = true + // 2. build side rows is empty, Join op is: inner join/right outer join/left semi/right semi/right anti + bool _short_circuit_for_probe = false; + std::unique_ptr<RowDescriptor> _output_row_desc; std::unique_ptr<RowDescriptor> _intermediate_row_desc; // output expr --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org