HappenLee commented on code in PR #11468: URL: https://github.com/apache/doris/pull/11468#discussion_r938672830
########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -2019,4 +1342,526 @@ Status VOlapScanNode::get_hints(TabletSharedPtr table, const TPaloScanRange& sca return Status::OK(); } +template <bool IsNotIn> +bool VOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, VExprContext* expr_ctx) { + if (pred->is_not_in() != IsNotIn) { + return false; + } + InState* state = reinterpret_cast<InState*>( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + HybridSetBase* set = state->hybrid_set.get(); + + // if there are too many elements in InPredicate, exceed the limit, + // we will not push any condition of this column to storage engine. + // because too many conditions pushed down to storage engine may even + // slow down the query process. + // ATTN: This is just an experience value. You may need to try + // different thresholds to improve performance. + if (set->size() > _max_pushdown_conditions_per_column) { + VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit " + << _max_pushdown_conditions_per_column; + return false; + } + return true; +} + +bool VOlapScanNode::_should_push_down_function( + VectorizedFnCall* fn_call, const std::function<bool(const std::string&)>& fn_checker) { + return fn_checker(fn_call->fn().name.function_name); +} + +bool VOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call, + VExprContext* expr_ctx, + std::string* constant_str, + doris_udf::FunctionContext** fn_ctx) { + // Now only `like` function filters is supported to push down + if (fn_call->fn().name.function_name != "like") { + return false; + } + + const auto& children = fn_call->children(); + doris_udf::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); + DCHECK(func_cxt != nullptr); + DCHECK(children.size() == 2); + for (size_t i = 0; i < children.size(); i++) { + if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + if (!children[1 - i]->is_constant()) { + // only handle constant value + return false; + } else { + DCHECK(children[1 - i]->type().is_string_type()); + if (const ColumnConst* const_column = check_and_get_column<ColumnConst>( + children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { + *constant_str = std::string(const_column->get_data_at(0).data, + const_column->get_data_at(0).size); + } else { + return false; + } + } + } + *fn_ctx = func_cxt; + return true; +} + +bool VOlapScanNode::_should_push_down_binary_predicate( + VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, + int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) { + if (!fn_checker(fn_call->fn().name.function_name)) { + return false; + } + + auto children = fn_call->children(); Review Comment: recheck all place call `fn_call->children()` use `const auto&` ########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -2019,4 +1342,526 @@ Status VOlapScanNode::get_hints(TabletSharedPtr table, const TPaloScanRange& sca return Status::OK(); } +template <bool IsNotIn> +bool VOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, VExprContext* expr_ctx) { + if (pred->is_not_in() != IsNotIn) { + return false; + } + InState* state = reinterpret_cast<InState*>( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + HybridSetBase* set = state->hybrid_set.get(); + + // if there are too many elements in InPredicate, exceed the limit, + // we will not push any condition of this column to storage engine. + // because too many conditions pushed down to storage engine may even + // slow down the query process. + // ATTN: This is just an experience value. You may need to try + // different thresholds to improve performance. + if (set->size() > _max_pushdown_conditions_per_column) { + VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit " + << _max_pushdown_conditions_per_column; + return false; + } + return true; +} + +bool VOlapScanNode::_should_push_down_function( + VectorizedFnCall* fn_call, const std::function<bool(const std::string&)>& fn_checker) { + return fn_checker(fn_call->fn().name.function_name); +} + +bool VOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call, + VExprContext* expr_ctx, + std::string* constant_str, + doris_udf::FunctionContext** fn_ctx) { + // Now only `like` function filters is supported to push down + if (fn_call->fn().name.function_name != "like") { + return false; + } + + const auto& children = fn_call->children(); + doris_udf::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); + DCHECK(func_cxt != nullptr); + DCHECK(children.size() == 2); + for (size_t i = 0; i < children.size(); i++) { + if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + if (!children[1 - i]->is_constant()) { + // only handle constant value + return false; + } else { + DCHECK(children[1 - i]->type().is_string_type()); + if (const ColumnConst* const_column = check_and_get_column<ColumnConst>( + children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { + *constant_str = std::string(const_column->get_data_at(0).data, + const_column->get_data_at(0).size); + } else { + return false; + } + } + } + *fn_ctx = func_cxt; + return true; +} + +bool VOlapScanNode::_should_push_down_binary_predicate( + VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val, + int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) { + if (!fn_checker(fn_call->fn().name.function_name)) { + return false; + } + + auto children = fn_call->children(); + DCHECK(children.size() == 2); + for (size_t i = 0; i < children.size(); i++) { + if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + if (!children[1 - i]->is_constant()) { + // only handle constant value + return false; + } else { + if (const ColumnConst* const_column = check_and_get_column<ColumnConst>( + children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { + *slot_ref_child = i; + *constant_val = const_column->get_data_at(0); + } else { + return false; + } + } + } + return true; +} + +bool VOlapScanNode::_is_predicate_acting_on_slot( + VExpr* expr, + const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& checker, + SlotDescriptor** slot_desc, ColumnValueRangeType** range) { + const VSlotRef* slot_ref = nullptr; + VExpr* child_contains_slot = nullptr; + if (!checker(expr->children(), &slot_ref, &child_contains_slot)) { + // not a slot ref(column) + return false; + } + + auto entry = _id_to_slot_column_value_range.find(slot_ref->slot_id()); + if (_id_to_slot_column_value_range.end() == entry) { + return false; + } + *slot_desc = entry->second.first; + DCHECK(child_contains_slot != nullptr); + if (child_contains_slot->type().type != (*slot_desc)->type().type) { + if (!ignore_cast(*slot_desc, child_contains_slot)) { + // the type of predicate not match the slot's type + return false; + } + } + *range = &(entry->second.second); + return true; +} + +template <PrimitiveType T> +Status VOlapScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr_ctx, + SlotDescriptor* slot, + ColumnValueRange<T>& range, bool* push_down) { + auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(slot->type().precision, + slot->type().scale); + bool effect = false; + // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' + if (TExprNodeType::IN_PRED == expr->node_type()) { + VInPredicate* pred = static_cast<VInPredicate*>(expr); + if (!_should_push_down_in_predicate<false>(pred, expr_ctx)) { + return Status::OK(); + } + + // begin to push InPredicate value into ColumnValueRange + InState* state = reinterpret_cast<InState*>( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + HybridSetBase::IteratorBase* iter = state->hybrid_set->begin(); + while (iter->has_next()) { + // column in (nullptr) is always false so continue to + // dispose next item + if (nullptr == iter->get_value()) { + iter->next(); + continue; + } + auto value = const_cast<void*>(iter->get_value()); + RETURN_IF_ERROR(change_value_range<true>(temp_range, value, + ColumnValueRange<T>::add_fixed_value_range)); + iter->next(); + } + + range.intersection(temp_range); + effect = true; + } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { + DCHECK(expr->children().size() == 2); + auto eq_checker = [&](const std::string& fn_name) { return fn_name == "eq"; }; + + StringRef value; + int slot_ref_child = -1; + if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, + &value, &slot_ref_child, eq_checker)) { + DCHECK(slot_ref_child >= 0); + // where A = nullptr should return empty result set + if (value.data != nullptr) { + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || + T == TYPE_HLL) { + auto val = StringValue(value.data, value.size); + RETURN_IF_ERROR( + change_value_range<true>(temp_range, reinterpret_cast<void*>(&val), + ColumnValueRange<T>::add_fixed_value_range)); + } else { + RETURN_IF_ERROR(change_value_range<true>( + temp_range, reinterpret_cast<void*>(const_cast<char*>(value.data)), + ColumnValueRange<T>::add_fixed_value_range)); + } + range.intersection(temp_range); + effect = true; + } + } + } + + // exceed limit, no conditions will be pushed down to storage engine. + if (range.get_fixed_value_size() > _max_pushdown_conditions_per_column) { + range.set_whole_value_range(); + } else { + *push_down = effect; + } + return Status::OK(); +} + +template <PrimitiveType T> +Status VOlapScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprContext* expr_ctx, + SlotDescriptor* slot, + ColumnValueRange<T>& range, + bool* push_down) { + bool is_fixed_range = range.is_fixed_value_range(); + auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range.column_name()); + bool effect = false; + // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' + if (TExprNodeType::IN_PRED == expr->node_type()) { + VInPredicate* pred = static_cast<VInPredicate*>(expr); + if (!_should_push_down_in_predicate<true>(pred, expr_ctx)) { + return Status::OK(); + } + + // begin to push InPredicate value into ColumnValueRange + InState* state = reinterpret_cast<InState*>( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + HybridSetBase::IteratorBase* iter = state->hybrid_set->begin(); + while (iter->has_next()) { + // column not in (nullptr) is always true + if (nullptr == iter->get_value()) { + continue; + } + auto value = const_cast<void*>(iter->get_value()); + if (is_fixed_range) { + RETURN_IF_ERROR(change_value_range<true>( + range, value, ColumnValueRange<T>::remove_fixed_value_range)); + } else { + RETURN_IF_ERROR(change_value_range<true>( + not_in_range, value, ColumnValueRange<T>::add_fixed_value_range)); + } + iter->next(); + } + effect = true; + } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { + DCHECK(expr->children().size() == 2); + + auto ne_checker = [&](const std::string& fn_name) { return fn_name == "ne"; }; Review Comment: the checker function should do not use `[&]`, use `[]` ########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -2019,4 +1342,526 @@ Status VOlapScanNode::get_hints(TabletSharedPtr table, const TPaloScanRange& sca return Status::OK(); } +template <bool IsNotIn> +bool VOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, VExprContext* expr_ctx) { + if (pred->is_not_in() != IsNotIn) { + return false; + } + InState* state = reinterpret_cast<InState*>( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + HybridSetBase* set = state->hybrid_set.get(); + + // if there are too many elements in InPredicate, exceed the limit, + // we will not push any condition of this column to storage engine. + // because too many conditions pushed down to storage engine may even + // slow down the query process. + // ATTN: This is just an experience value. You may need to try + // different thresholds to improve performance. + if (set->size() > _max_pushdown_conditions_per_column) { + VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit " + << _max_pushdown_conditions_per_column; + return false; + } + return true; +} + +bool VOlapScanNode::_should_push_down_function( + VectorizedFnCall* fn_call, const std::function<bool(const std::string&)>& fn_checker) { + return fn_checker(fn_call->fn().name.function_name); +} + +bool VOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call, + VExprContext* expr_ctx, + std::string* constant_str, + doris_udf::FunctionContext** fn_ctx) { + // Now only `like` function filters is supported to push down + if (fn_call->fn().name.function_name != "like") { + return false; + } + + const auto& children = fn_call->children(); + doris_udf::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); + DCHECK(func_cxt != nullptr); + DCHECK(children.size() == 2); + for (size_t i = 0; i < children.size(); i++) { + if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; + } + if (!children[1 - i]->is_constant()) { + // only handle constant value + return false; + } else { + DCHECK(children[1 - i]->type().is_string_type()); + if (const ColumnConst* const_column = check_and_get_column<ColumnConst>( + children[1 - i]->get_const_col(expr_ctx)->column_ptr)) { + *constant_str = std::string(const_column->get_data_at(0).data, Review Comment: const_column->get_data_at(0)->to_string() ########## be/src/vec/exec/volap_scan_node.cpp: ########## @@ -966,152 +794,83 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) { return false; } -bool VOlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor* slot, - doris::InPredicate* pred) { - if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - // not a slot ref(column) - return false; - } - - std::vector<SlotId> slot_ids; - if (pred->get_child(0)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return false; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return false; - } - - if (pred->get_child(0)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(0))) { - // the type of predicate not match the slot's type - return false; - } - } - - VLOG_CRITICAL << slot->col_name() << " fixed_values add num: " << pred->hybrid_set()->size(); - - // if there are too many elements in InPredicate, exceed the limit, - // we will not push any condition of this column to storage engine. - // because too many conditions pushed down to storage engine may even - // slow down the query process. - // ATTN: This is just an experience value. You may need to try - // different thresholds to improve performance. - if (pred->hybrid_set()->size() > _max_pushdown_conditions_per_column) { - VLOG_NOTICE << "Predicate value num " << pred->hybrid_set()->size() << " exceed limit " - << _max_pushdown_conditions_per_column; - return false; - } - - return true; -} - -std::pair<bool, void*> VOlapScanNode::should_push_down_eq_predicate(doris::SlotDescriptor* slot, - doris::Expr* pred, int conj_idx, - int child_idx) { - auto result_pair = std::make_pair<bool, void*>(false, nullptr); - - // Do not get slot_ref of column, should not push_down to Storage Engine - if (Expr::type_without_cast(pred->get_child(child_idx)) != TExprNodeType::SLOT_REF) { - return result_pair; - } - - std::vector<SlotId> slot_ids; - if (pred->get_child(child_idx)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return result_pair; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return result_pair; - } - - if (pred->get_child(child_idx)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(child_idx))) { - // the type of predicate not match the slot's type - return result_pair; +template <bool IsFixed, PrimitiveType PrimitiveType, typename ChangeFixedValueRangeFunc> +Status VOlapScanNode::change_value_range(ColumnValueRange<PrimitiveType>& temp_range, void* value, + const ChangeFixedValueRangeFunc& func, std::string fn_name, Review Comment: const std::string& fn_name -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org