HappenLee commented on code in PR #11468:
URL: https://github.com/apache/doris/pull/11468#discussion_r939826917


##########
be/src/vec/exec/volap_scan_node.cpp:
##########
@@ -966,152 +794,83 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* 
expr) {
     return false;
 }
 
-bool VOlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor* slot,
-                                                  doris::InPredicate* pred) {
-    if (Expr::type_without_cast(pred->get_child(0)) != 
TExprNodeType::SLOT_REF) {
-        // not a slot ref(column)
-        return false;
-    }
-
-    std::vector<SlotId> slot_ids;
-    if (pred->get_child(0)->get_slot_ids(&slot_ids) != 1) {
-        // not a single column predicate
-        return false;
-    }
-
-    if (slot_ids[0] != slot->id()) {
-        // predicate not related to current column
-        return false;
-    }
-
-    if (pred->get_child(0)->type().type != slot->type().type) {
-        if (!ignore_cast(slot, pred->get_child(0))) {
-            // the type of predicate not match the slot's type
-            return false;
-        }
-    }
-
-    VLOG_CRITICAL << slot->col_name() << " fixed_values add num: " << 
pred->hybrid_set()->size();
-
-    // if there are too many elements in InPredicate, exceed the limit,
-    // we will not push any condition of this column to storage engine.
-    // because too many conditions pushed down to storage engine may even
-    // slow down the query process.
-    // ATTN: This is just an experience value. You may need to try
-    // different thresholds to improve performance.
-    if (pred->hybrid_set()->size() > _max_pushdown_conditions_per_column) {
-        VLOG_NOTICE << "Predicate value num " << pred->hybrid_set()->size() << 
" exceed limit "
-                    << _max_pushdown_conditions_per_column;
-        return false;
-    }
-
-    return true;
-}
-
-std::pair<bool, void*> 
VOlapScanNode::should_push_down_eq_predicate(doris::SlotDescriptor* slot,
-                                                                    
doris::Expr* pred, int conj_idx,
-                                                                    int 
child_idx) {
-    auto result_pair = std::make_pair<bool, void*>(false, nullptr);
-
-    // Do not get slot_ref of column, should not push_down to Storage Engine
-    if (Expr::type_without_cast(pred->get_child(child_idx)) != 
TExprNodeType::SLOT_REF) {
-        return result_pair;
-    }
-
-    std::vector<SlotId> slot_ids;
-    if (pred->get_child(child_idx)->get_slot_ids(&slot_ids) != 1) {
-        // not a single column predicate
-        return result_pair;
-    }
-
-    if (slot_ids[0] != slot->id()) {
-        // predicate not related to current column
-        return result_pair;
-    }
-
-    if (pred->get_child(child_idx)->type().type != slot->type().type) {
-        if (!ignore_cast(slot, pred->get_child(child_idx))) {
-            // the type of predicate not match the slot's type
-            return result_pair;
+template <bool IsFixed, PrimitiveType PrimitiveType, typename 
ChangeFixedValueRangeFunc>
+Status VOlapScanNode::change_value_range(ColumnValueRange<PrimitiveType>& 
temp_range, void* value,
+                                         const ChangeFixedValueRangeFunc& func,
+                                         std::string& fn_name, int 
slot_ref_child) {

Review Comment:
   not change const std::string



##########
be/src/vec/exec/volap_scan_node.cpp:
##########
@@ -2019,4 +1342,534 @@ Status VOlapScanNode::get_hints(TabletSharedPtr table, 
const TPaloScanRange& sca
     return Status::OK();
 }
 
+template <bool IsNotIn>
+bool VOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, 
VExprContext* expr_ctx) {
+    if (pred->is_not_in() != IsNotIn) {
+        return false;
+    }
+    InState* state = reinterpret_cast<InState*>(
+            expr_ctx->fn_context(pred->fn_context_index())
+                    ->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+    HybridSetBase* set = state->hybrid_set.get();
+
+    // if there are too many elements in InPredicate, exceed the limit,
+    // we will not push any condition of this column to storage engine.
+    // because too many conditions pushed down to storage engine may even
+    // slow down the query process.
+    // ATTN: This is just an experience value. You may need to try
+    // different thresholds to improve performance.
+    if (set->size() > _max_pushdown_conditions_per_column) {
+        VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit 
"
+                    << _max_pushdown_conditions_per_column;
+        return false;
+    }
+    return true;
+}
+
+bool VOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* 
fn_call,
+                                                      VExprContext* expr_ctx,
+                                                      std::string* 
constant_str,
+                                                      
doris_udf::FunctionContext** fn_ctx) {
+    // Now only `like` function filters is supported to push down
+    if (fn_call->fn().name.function_name != "like") {
+        return false;
+    }
+
+    const auto& children = fn_call->children();
+    doris_udf::FunctionContext* func_cxt = 
expr_ctx->fn_context(fn_call->fn_context_index());
+    DCHECK(func_cxt != nullptr);
+    DCHECK(children.size() == 2);
+    for (size_t i = 0; i < children.size(); i++) {
+        if (VExpr::expr_without_cast(children[i])->node_type() != 
TExprNodeType::SLOT_REF) {
+            // not a slot ref(column)
+            continue;
+        }
+        if (!children[1 - i]->is_constant()) {
+            // only handle constant value
+            return false;
+        } else {
+            DCHECK(children[1 - i]->type().is_string_type());
+            if (const ColumnConst* const_column = 
check_and_get_column<ColumnConst>(
+                        children[1 - i]->get_const_col(expr_ctx)->column_ptr)) 
{
+                *constant_str = const_column->get_data_at(0).to_string();
+            } else {
+                return false;
+            }
+        }
+    }
+    *fn_ctx = func_cxt;
+    return true;
+}
+
+bool VOlapScanNode::_should_push_down_binary_predicate(
+        VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* 
constant_val,
+        int* slot_ref_child, const std::function<bool(const std::string&)>& 
fn_checker) {
+    if (!fn_checker(fn_call->fn().name.function_name)) {
+        return false;
+    }
+
+    const auto& children = fn_call->children();
+    DCHECK(children.size() == 2);
+    for (size_t i = 0; i < children.size(); i++) {
+        if (VExpr::expr_without_cast(children[i])->node_type() != 
TExprNodeType::SLOT_REF) {
+            // not a slot ref(column)
+            continue;
+        }
+        if (!children[1 - i]->is_constant()) {
+            // only handle constant value
+            return false;
+        } else {
+            if (const ColumnConst* const_column = 
check_and_get_column<ColumnConst>(
+                        children[1 - i]->get_const_col(expr_ctx)->column_ptr)) 
{
+                *slot_ref_child = i;
+                *constant_val = const_column->get_data_at(0);
+            } else {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+bool VOlapScanNode::_is_predicate_acting_on_slot(
+        VExpr* expr,
+        const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, 
VExpr**)>& checker,
+        SlotDescriptor** slot_desc, ColumnValueRangeType** range) {
+    const VSlotRef* slot_ref = nullptr;
+    VExpr* child_contains_slot = nullptr;
+    if (!checker(expr->children(), &slot_ref, &child_contains_slot)) {
+        // not a slot ref(column)
+        return false;
+    }
+
+    auto entry = _id_to_slot_column_value_range.find(slot_ref->slot_id());
+    if (_id_to_slot_column_value_range.end() == entry) {
+        return false;
+    }
+    *slot_desc = entry->second.first;
+    DCHECK(child_contains_slot != nullptr);
+    if (child_contains_slot->type().type != (*slot_desc)->type().type) {
+        if (!ignore_cast(*slot_desc, child_contains_slot)) {
+            // the type of predicate not match the slot's type
+            return false;
+        }
+    }
+    *range = &(entry->second.second);
+    return true;
+}
+
+template <PrimitiveType T>
+Status VOlapScanNode::_normalize_in_and_eq_predicate(VExpr* expr, 
VExprContext* expr_ctx,
+                                                     SlotDescriptor* slot,
+                                                     ColumnValueRange<T>& 
range, bool* push_down) {
+    auto temp_range = 
ColumnValueRange<T>::create_empty_column_value_range(slot->type().precision,
+                                                                           
slot->type().scale);
+    bool effect = false;
+    // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
+    if (TExprNodeType::IN_PRED == expr->node_type()) {
+        VInPredicate* pred = static_cast<VInPredicate*>(expr);
+        if (!_should_push_down_in_predicate<false>(pred, expr_ctx)) {
+            return Status::OK();
+        }
+
+        // begin to push InPredicate value into ColumnValueRange
+        InState* state = reinterpret_cast<InState*>(
+                expr_ctx->fn_context(pred->fn_context_index())
+                        ->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+        HybridSetBase::IteratorBase* iter = state->hybrid_set->begin();
+        auto fn_name = std::string("");
+        while (iter->has_next()) {
+            // column in (nullptr) is always false so continue to
+            // dispose next item
+            if (nullptr == iter->get_value()) {
+                iter->next();
+                continue;
+            }
+            auto value = const_cast<void*>(iter->get_value());
+            RETURN_IF_ERROR(change_value_range<true>(
+                    temp_range, value, 
ColumnValueRange<T>::add_fixed_value_range, fn_name));
+            iter->next();
+        }
+
+        range.intersection(temp_range);
+        effect = true;
+    } else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
+        DCHECK(expr->children().size() == 2);
+        auto eq_checker = [](const std::string& fn_name) { return fn_name == 
"eq"; };
+
+        StringRef value;
+        int slot_ref_child = -1;
+        if 
(_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), 
expr_ctx,
+                                               &value, &slot_ref_child, 
eq_checker)) {
+            DCHECK(slot_ref_child >= 0);
+            // where A = nullptr should return empty result set
+            auto fn_name = std::string("");
+            if (value.data != nullptr) {
+                if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == 
TYPE_STRING ||
+                              T == TYPE_HLL) {
+                    auto val = StringValue(value.data, value.size);
+                    RETURN_IF_ERROR(change_value_range<true>(
+                            temp_range, reinterpret_cast<void*>(&val),
+                            ColumnValueRange<T>::add_fixed_value_range, 
fn_name));
+                } else {
+                    RETURN_IF_ERROR(change_value_range<true>(
+                            temp_range, 
reinterpret_cast<void*>(const_cast<char*>(value.data)),
+                            ColumnValueRange<T>::add_fixed_value_range, 
fn_name));
+                }
+                range.intersection(temp_range);
+                effect = true;
+            }
+        }
+    }
+
+    // exceed limit, no conditions will be pushed down to storage engine.
+    if (range.get_fixed_value_size() > _max_pushdown_conditions_per_column) {
+        range.set_whole_value_range();
+    } else {
+        *push_down = effect;
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType T>
+Status VOlapScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, 
VExprContext* expr_ctx,
+                                                             SlotDescriptor* 
slot,
+                                                             
ColumnValueRange<T>& range,
+                                                             bool* push_down) {
+    bool is_fixed_range = range.is_fixed_value_range();
+    auto not_in_range = 
ColumnValueRange<T>::create_empty_column_value_range(range.column_name());
+    bool effect = false;
+    // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
+    if (TExprNodeType::IN_PRED == expr->node_type()) {
+        VInPredicate* pred = static_cast<VInPredicate*>(expr);
+        if (!_should_push_down_in_predicate<true>(pred, expr_ctx)) {
+            return Status::OK();
+        }
+
+        // begin to push InPredicate value into ColumnValueRange
+        InState* state = reinterpret_cast<InState*>(
+                expr_ctx->fn_context(pred->fn_context_index())
+                        ->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+        HybridSetBase::IteratorBase* iter = state->hybrid_set->begin();
+        auto fn_name = std::string("");
+        while (iter->has_next()) {
+            // column not in (nullptr) is always true
+            if (nullptr == iter->get_value()) {
+                continue;
+            }
+            auto value = const_cast<void*>(iter->get_value());
+            if (is_fixed_range) {
+                RETURN_IF_ERROR(change_value_range<true>(
+                        range, value, 
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
+            } else {
+                RETURN_IF_ERROR(change_value_range<true>(
+                        not_in_range, value, 
ColumnValueRange<T>::add_fixed_value_range, fn_name));
+            }
+            iter->next();
+        }
+        effect = true;
+    } else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
+        DCHECK(expr->children().size() == 2);
+
+        auto ne_checker = [](const std::string& fn_name) { return fn_name == 
"ne"; };
+        StringRef value;
+        int slot_ref_child = -1;
+        if 
(_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), 
expr_ctx,
+                                               &value, &slot_ref_child, 
ne_checker)) {
+            DCHECK(slot_ref_child >= 0);
+            // where A = nullptr should return empty result set
+            if (value.data != nullptr) {
+                auto fn_name = std::string("");
+                if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == 
TYPE_STRING ||
+                              T == TYPE_HLL) {
+                    auto val = StringValue(value.data, value.size);
+                    if (is_fixed_range) {
+                        RETURN_IF_ERROR(change_value_range<true>(
+                                range, reinterpret_cast<void*>(&val),
+                                ColumnValueRange<T>::remove_fixed_value_range, 
fn_name));
+                    } else {
+                        RETURN_IF_ERROR(change_value_range<true>(
+                                not_in_range, reinterpret_cast<void*>(&val),
+                                ColumnValueRange<T>::add_fixed_value_range, 
fn_name));
+                    }
+                } else {
+                    if (is_fixed_range) {
+                        RETURN_IF_ERROR(change_value_range<true>(
+                                range, 
reinterpret_cast<void*>(const_cast<char*>(value.data)),
+                                ColumnValueRange<T>::remove_fixed_value_range, 
fn_name));
+                    } else {
+                        RETURN_IF_ERROR(change_value_range<true>(
+                                not_in_range,
+                                
reinterpret_cast<void*>(const_cast<char*>(value.data)),
+                                ColumnValueRange<T>::add_fixed_value_range, 
fn_name));
+                    }
+                }
+                effect = true;
+            }
+        }
+    }
+
+    if (is_fixed_range ||
+        not_in_range.get_fixed_value_size() <= 
_max_pushdown_conditions_per_column) {
+        if (!is_fixed_range) {
+            // push down not in condition to storage engine
+            not_in_range.to_in_condition(_olap_filter, false);
+        }
+        *push_down = effect;
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType T>
+Status VOlapScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* 
expr_ctx,
+                                                   SlotDescriptor* slot, 
ColumnValueRange<T>& range,
+                                                   bool* push_down) {
+    if (TExprNodeType::FUNCTION_CALL == expr->node_type()) {
+        if (reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name 
== "is_null_pred") {
+            auto temp_range = 
ColumnValueRange<T>::create_empty_column_value_range(
+                    slot->type().precision, slot->type().scale);
+            temp_range.set_contain_null(true);
+            range.intersection(temp_range);
+            *push_down = true;
+        } else if 
(reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name ==
+                   "is_not_null_pred") {
+            auto temp_range = 
ColumnValueRange<T>::create_empty_column_value_range(
+                    slot->type().precision, slot->type().scale);
+            temp_range.set_contain_null(false);
+            range.intersection(temp_range);
+            *push_down = true;
+        }
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType T>
+Status VOlapScanNode::_normalize_noneq_binary_predicate(VExpr* expr, 
VExprContext* expr_ctx,
+                                                        SlotDescriptor* slot,
+                                                        ColumnValueRange<T>& 
range,
+                                                        bool* push_down) {
+    if (TExprNodeType::BINARY_PRED == expr->node_type()) {
+        DCHECK(expr->children().size() == 2);
+
+        auto noneq_checker = [](const std::string& fn_name) {
+            return fn_name != "ne" && fn_name != "eq";
+        };
+        StringRef value;
+        int slot_ref_child = -1;
+        if 
(_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), 
expr_ctx,
+                                               &value, &slot_ref_child, 
noneq_checker)) {
+            DCHECK(slot_ref_child >= 0);
+            std::string fn_name =

Review Comment:
   const string&



##########
be/src/exec/olap_utils.h:
##########
@@ -215,4 +218,23 @@ inline SQLFilterOp to_olap_filter_type(TExprOpcode::type 
type, bool opposite) {
     return FILTER_IN;
 }
 
+inline SQLFilterOp to_olap_filter_type(std::string& function_name, bool 
opposite) {

Review Comment:
   not change const std::string



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to