This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 668188b  [improvement][vectorized] support es node predicate peel 
(#8174)
668188b is described below

commit 668188b91f5c2a9d27ad257fc409f1df11aa0c17
Author: Pxl <952130...@qq.com>
AuthorDate: Sat Feb 26 17:02:54 2022 +0800

    [improvement][vectorized] support es node predicate peel (#8174)
---
 be/src/exec/es/es_predicate.cpp                    |   2 +-
 be/src/exec/es/es_predicate.h                      |   4 +-
 be/src/exec/es/es_query_builder.cpp                | 121 ++++++++++-----------
 be/src/exec/es/es_query_builder.h                  |   1 +
 be/src/exec/es_http_scan_node.cpp                  |  24 +++-
 be/src/exec/es_http_scan_node.h                    |  10 +-
 be/src/exec/olap_scan_node.cpp                     |  58 ++--------
 be/src/exec/olap_scan_node.h                       |  14 +--
 be/src/exec/scan_node.cpp                          |  28 +++++
 be/src/exec/scan_node.h                            |   9 +-
 be/src/vec/CMakeLists.txt                          |   1 +
 be/src/vec/exec/ves_http_scanner.cpp               |   8 +-
 be/src/vec/exec/ves_http_scanner.h                 |  24 ++--
 be/src/vec/functions/function_convert_tz.h         |   2 +
 .../function_date_or_datetime_computation.h        |   2 +
 .../function_date_or_datetime_to_something.h       |   2 +
 .../function_date_or_datetime_to_string.h          |   2 +
 .../functions/function_datetime_string_to_string.h |   2 +
 .../{function_hash.h => function_fake.cpp}         |  22 +---
 ...{function_multi_same_args.h => function_fake.h} |  36 +++---
 be/src/vec/functions/function_grouping.h           |  10 +-
 be/src/vec/functions/function_hash.h               |   1 +
 be/src/vec/functions/function_ifnull.h             |  49 ++++-----
 be/src/vec/functions/function_multi_same_args.h    |   4 +-
 be/src/vec/functions/function_rpc.h                |   2 +
 be/src/vec/functions/function_string_to_string.h   |   2 +
 be/src/vec/functions/simple_function_factory.h     |   3 +
 be/src/vec/utils/util.hpp                          |  26 ++++-
 gensrc/script/doris_builtins_functions.py          |   2 +-
 29 files changed, 244 insertions(+), 227 deletions(-)

diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp
index 3b8be66..7dab9ca 100644
--- a/be/src/exec/es/es_predicate.cpp
+++ b/be/src/exec/es/es_predicate.cpp
@@ -181,7 +181,7 @@ Status EsPredicate::build_disjuncts_list() {
 }
 
 // make sure to build by build_disjuncts_list
-const std::vector<ExtPredicate*>& EsPredicate::get_predicate_list() {
+const std::vector<ExtPredicate*>& EsPredicate::get_predicate_list() const {
     return _disjuncts;
 }
 
diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h
index 825bc5e..826af0a 100644
--- a/be/src/exec/es/es_predicate.h
+++ b/be/src/exec/es/es_predicate.h
@@ -141,10 +141,10 @@ class EsPredicate {
 public:
     EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc, 
ObjectPool* pool);
     ~EsPredicate();
-    const std::vector<ExtPredicate*>& get_predicate_list();
+    const std::vector<ExtPredicate*>& get_predicate_list() const;
     Status build_disjuncts_list();
     // public for tests
-    EsPredicate(const std::vector<ExtPredicate*>& all_predicates) { _disjuncts 
= all_predicates; };
+    EsPredicate(const std::vector<ExtPredicate*>& all_predicates) { _disjuncts 
= all_predicates; }
 
     Status get_es_query_status() { return _es_query_status; }
 
diff --git a/be/src/exec/es/es_query_builder.cpp 
b/be/src/exec/es/es_query_builder.cpp
index 5f8dcfd..1f9488e 100644
--- a/be/src/exec/es/es_query_builder.cpp
+++ b/be/src/exec/es/es_query_builder.cpp
@@ -54,19 +54,19 @@ TermQueryBuilder::TermQueryBuilder(const std::string& 
field, const std::string&
 
 TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate)
         : _field(binary_predicate.col.name), _match_none(false) {
-        if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) {
-            int val = atoi(binary_predicate.value.to_string().c_str());
-            if (val == 1) {
-                _term = std::string("true");
-            } else if (val == 0){
-                _term = std::string("false");
-            } else {
-                // keep semantic consistent with mysql
-                _match_none = true;
-            }
+    if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) {
+        int val = atoi(binary_predicate.value.to_string().c_str());
+        if (val == 1) {
+            _term = std::string("true");
+        } else if (val == 0) {
+            _term = std::string("false");
         } else {
-            _term = binary_predicate.value.to_string();
+            // keep semantic consistent with mysql
+            _match_none = true;
         }
+    } else {
+        _term = binary_predicate.value.to_string();
+    }
 }
 
 void TermQueryBuilder::to_json(rapidjson::Document* document, 
rapidjson::Value* query) {
@@ -82,7 +82,6 @@ void TermQueryBuilder::to_json(rapidjson::Document* document, 
rapidjson::Value*
         // this would only appear `bool` column's predicate (a = 2)
         query->AddMember("match_none", term_node, allocator);
     }
-
 }
 
 RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate)
@@ -381,66 +380,62 @@ Status BooleanQueryBuilder::check_es_query(const 
ExtFunction& extFunction) {
 
 void BooleanQueryBuilder::validate(const std::vector<EsPredicate*>& 
espredicates,
                                    std::vector<bool>* result) {
-    int conjunct_size = espredicates.size();
-    result->reserve(conjunct_size);
     for (auto espredicate : espredicates) {
-        bool flag = true;
-        for (auto predicate : espredicate->get_predicate_list()) {
-            switch (predicate->node_type) {
-            case TExprNodeType::BINARY_PRED: {
-                ExtBinaryPredicate* binary_predicate = 
(ExtBinaryPredicate*)predicate;
-                TExprOpcode::type op = binary_predicate->op;
-                if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != 
TExprOpcode::LT &&
-                    op != TExprOpcode::LE && op != TExprOpcode::GT && op != 
TExprOpcode::GE) {
-                    flag = false;
-                }
-                break;
-            }
-            case TExprNodeType::COMPOUND_PRED: {
-                ExtCompPredicates* compound_predicates = 
(ExtCompPredicates*)predicate;
-                if (compound_predicates->op == TExprOpcode::COMPOUND_AND) {
-                    std::vector<bool> list;
-                    validate(compound_predicates->conjuncts, &list);
-                    for (int i = list.size() - 1; i >= 0; i--) {
-                        if (!list[i]) {
-                            flag = false;
-                            break;
-                        }
-                    }
-                } else {
-                    // reserved for compound_not
-                    flag = false;
-                }
-                break;
+        result->push_back(validate(espredicate));
+    }
+}
+
+bool BooleanQueryBuilder::validate(const EsPredicate* espredicate) {
+    for (auto predicate : espredicate->get_predicate_list()) {
+        switch (predicate->node_type) {
+        case TExprNodeType::BINARY_PRED: {
+            ExtBinaryPredicate* binary_predicate = 
(ExtBinaryPredicate*)predicate;
+            TExprOpcode::type op = binary_predicate->op;
+            if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != 
TExprOpcode::LT &&
+                op != TExprOpcode::LE && op != TExprOpcode::GT && op != 
TExprOpcode::GE) {
+                return false;
             }
-            case TExprNodeType::LIKE_PRED:
-            case TExprNodeType::IS_NULL_PRED:
-            case TExprNodeType::IN_PRED: {
-                break;
+            break;
+        }
+        case TExprNodeType::COMPOUND_PRED: {
+            ExtCompPredicates* compound_predicates = 
(ExtCompPredicates*)predicate;
+            if (compound_predicates->op != TExprOpcode::COMPOUND_AND) {
+                // reserved for compound_not
+                return false;
             }
-            case TExprNodeType::FUNCTION_CALL: {
-                ExtFunction* function_predicate = (ExtFunction*)predicate;
-                if ("esquery" == function_predicate->func_name) {
-                    Status st = check_es_query(*function_predicate);
-                    if (!st.ok()) {
-                        flag = false;
-                    }
-                } else {
-                    flag = false;
+            std::vector<bool> list;
+            validate(compound_predicates->conjuncts, &list);
+            for (int i = list.size() - 1; i >= 0; i--) {
+                if (!list[i]) {
+                    return false;
                 }
-                break;
-            }
-            default: {
-                flag = false;
-                break;
             }
+            break;
+        }
+        case TExprNodeType::LIKE_PRED:
+        case TExprNodeType::IS_NULL_PRED:
+        case TExprNodeType::IN_PRED: {
+            break;
+        }
+        case TExprNodeType::FUNCTION_CALL: {
+            ExtFunction* function_predicate = (ExtFunction*)predicate;
+            if ("esquery" != function_predicate->func_name) {
+                return false;
             }
-            if (!flag) {
-                break;
+            Status st = check_es_query(*function_predicate);
+            if (!st.ok()) {
+                return false;
             }
+            break;
+        }
+        default: {
+            return false;
+            break;
+        }
         }
-        result->push_back(flag);
     }
+
+    return true;
 }
 
 void BooleanQueryBuilder::to_query(const std::vector<EsPredicate*>& predicates,
diff --git a/be/src/exec/es/es_query_builder.h 
b/be/src/exec/es/es_query_builder.h
index c471380..6a5d16d 100644
--- a/be/src/exec/es/es_query_builder.h
+++ b/be/src/exec/es/es_query_builder.h
@@ -119,6 +119,7 @@ public:
     static Status check_es_query(const ExtFunction& extFunction);
     // decide which predicate can process
     static void validate(const std::vector<EsPredicate*>& espredicates, 
std::vector<bool>* result);
+    static bool validate(const EsPredicate* espredicate);
 
 private:
     // add child query
diff --git a/be/src/exec/es_http_scan_node.cpp 
b/be/src/exec/es_http_scan_node.cpp
index 7b67486..64b9792 100644
--- a/be/src/exec/es_http_scan_node.cpp
+++ b/be/src/exec/es_http_scan_node.cpp
@@ -68,6 +68,9 @@ Status EsHttpScanNode::prepare(RuntimeState* state) {
     VLOG_QUERY << "EsHttpScanNode prepare";
     RETURN_IF_ERROR(ScanNode::prepare(state));
 
+    _scanner_profile.reset(new RuntimeProfile("EsHttpScanNode"));
+    runtime_profile()->add_child(_scanner_profile.get(), true, nullptr);
+
     _runtime_state = state;
     _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
     if (_tuple_desc == nullptr) {
@@ -92,14 +95,20 @@ Status EsHttpScanNode::prepare(RuntimeState* state) {
 // build predicate
 Status EsHttpScanNode::build_conjuncts_list() {
     Status status = Status::OK();
+    _conjunct_to_predicate.resize(_conjunct_ctxs.size());
+
     for (int i = 0; i < _conjunct_ctxs.size(); ++i) {
         EsPredicate* predicate = _pool->add(new EsPredicate(_conjunct_ctxs[i], 
_tuple_desc, _pool));
         predicate->set_field_context(_fields_context);
         status = predicate->build_disjuncts_list();
         if (status.ok()) {
-            _predicates.push_back(predicate);
+            _conjunct_to_predicate[i] = _predicate_to_conjunct.size();
             _predicate_to_conjunct.push_back(i);
+
+            _predicates.push_back(predicate);
         } else {
+            _conjunct_to_predicate[i] = -1;
+
             VLOG_CRITICAL << status.get_error_msg();
             status = predicate->get_es_query_status();
             if (!status.ok()) {
@@ -133,6 +142,7 @@ Status EsHttpScanNode::open(RuntimeState* state) {
     // remove those predicates which ES cannot support
     std::vector<bool> list;
     BooleanQueryBuilder::validate(_predicates, &list);
+
     DCHECK(list.size() == _predicate_to_conjunct.size());
     for (int i = list.size() - 1; i >= 0; i--) {
         if (!list[i]) {
@@ -148,6 +158,12 @@ Status EsHttpScanNode::open(RuntimeState* state) {
         _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index);
     }
 
+    auto checker = [&](int index) {
+        return _conjunct_to_predicate[index] != -1 && 
list[_conjunct_to_predicate[index]];
+    };
+    std::string vconjunct_information = _peel_pushed_vconjunct(checker);
+    _scanner_profile->add_info_string("VconjunctExprTree", 
vconjunct_information);
+
     RETURN_IF_ERROR(start_scanners());
 
     return Status::OK();
@@ -443,9 +459,9 @@ void EsHttpScanNode::scanner_worker(int start_idx, int 
length, std::promise<Stat
                                   scanner_expr_ctxs, &counter, 
doc_value_mode));
         status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter);
     } else {
-        std::unique_ptr<VEsHttpScanner> scanner(
-                new VEsHttpScanner(_runtime_state, runtime_profile(), 
_tuple_id, properties,
-                                  scanner_expr_ctxs, &counter, 
doc_value_mode));
+        std::unique_ptr<vectorized::VEsHttpScanner> scanner(new 
vectorized::VEsHttpScanner(
+                _runtime_state, runtime_profile(), _tuple_id, properties, 
scanner_expr_ctxs,
+                &counter, doc_value_mode));
         status = scanner_scan(std::move(scanner));
     }
     if (!status.ok()) {
diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h
index 2564294..0fc9fac 100644
--- a/be/src/exec/es_http_scan_node.h
+++ b/be/src/exec/es_http_scan_node.h
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef BE_EXEC_ES_HTTP_SCAN_NODE_H
-#define BE_EXEC_ES_HTTP_SCAN_NODE_H
+#pragma once
 
 #include <atomic>
 #include <condition_variable>
@@ -99,7 +98,7 @@ private:
     Status scanner_scan(std::unique_ptr<EsHttpScanner> scanner,
                         const std::vector<ExprContext*>& conjunct_ctxs, 
EsScanCounter* counter);
 
-    virtual Status scanner_scan(std::unique_ptr<VEsHttpScanner> scanner) {
+    virtual Status scanner_scan(std::unique_ptr<vectorized::VEsHttpScanner> 
scanner) {
         return Status::NotSupported("vectorized scan in EsHttpScanNode is not 
supported!");
     };
 
@@ -117,8 +116,9 @@ private:
     std::vector<EsPredicate*> _predicates;
 
     std::vector<int> _predicate_to_conjunct;
+    std::vector<int> _conjunct_to_predicate;
+
+    std::unique_ptr<RuntimeProfile> _scanner_profile;
 };
 
 } // namespace doris
-
-#endif
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index f1f0169..af26ae1 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -83,10 +83,8 @@ Status OlapScanNode::init(const TPlanNode& tnode, 
RuntimeState* state) {
     for (int i = 0; i < filter_size; ++i) {
         IRuntimeFilter* runtime_filter = nullptr;
         const auto& filter_desc = _runtime_filter_descs[i];
-        
RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter(RuntimeFilterRole::CONSUMER,
-                                                                   filter_desc,
-                                                                   
state->query_options(),
-                                                                   id()));
+        RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter(
+                RuntimeFilterRole::CONSUMER, filter_desc, 
state->query_options(), id()));
         
RETURN_IF_ERROR(state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id,
                                                                         
&runtime_filter));
 
@@ -162,7 +160,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
     _olap_wait_batch_queue_timer = ADD_TIMER(_runtime_profile, 
"BatchQueueWaitTime");
 
     // for the purpose of debugging or profiling
-    for (int i = 0; i < 
sizeof(_general_debug_timer)/sizeof(*_general_debug_timer); ++i) {
+    for (int i = 0; i < GENERAL_DEBUG_COUNT; ++i) {
         char name[64];
         snprintf(name, sizeof(name), "GeneralDebugTimer%d", i);
         _general_debug_timer[i] = ADD_TIMER(_segment_profile, name);
@@ -529,6 +527,7 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* 
state) {
             iter->second->runtimefilter->set_push_down_profile();
         }
     }
+
     // set vconjunct_ctx is empty, if all conjunct
     if (_direct_conjunct_size == 0) {
         if (_vconjunct_ctx_ptr.get() != nullptr) {
@@ -536,8 +535,11 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* 
state) {
             _vconjunct_ctx_ptr = nullptr;
         }
     }
+
     // filter idle conjunct in vexpr_contexts
-    _peel_pushed_conjuncts();
+    auto checker = [&](int index) { return 
_pushed_conjuncts_index.count(index); };
+    std::string vconjunct_information = _peel_pushed_vconjunct(checker);
+    _scanner_profile->add_info_string("VconjunctExprTree", 
vconjunct_information);
 }
 
 void OlapScanNode::eval_const_conjuncts() {
@@ -1679,48 +1681,4 @@ Status OlapScanNode::add_one_batch(RowBatch* row_batch) {
     _row_batch_added_cv.notify_one();
     return Status::OK();
 }
-
-
-vectorized::VExpr* OlapScanNode::_dfs_peel_conjunct(vectorized::VExpr* expr, 
int& leaf_index) {
-    static constexpr auto is_leaf = [](vectorized::VExpr* expr) { return 
!expr->is_and_expr(); };
-
-    if (is_leaf(expr)) {
-        return _pushed_conjuncts_index.count(leaf_index++) ? nullptr : expr;
-    } else {
-        vectorized::VExpr* left_child = 
_dfs_peel_conjunct(expr->children()[0], leaf_index);
-        vectorized::VExpr* right_child = 
_dfs_peel_conjunct(expr->children()[1], leaf_index);
-
-        if (left_child != nullptr && right_child != nullptr) {
-            expr->set_children({left_child, right_child});
-            return expr;
-        }
-        // here do not close Expr* now
-        return left_child != nullptr ? left_child : right_child;
-    }
-}
-
-// This function is used to remove pushed expr in expr tree.
-// It relies on the logic of function convertConjunctsToAndCompoundPredicate() 
of FE splicing expr.
-// It requires FE to satisfy each splicing with 'and' expr, and spliced from 
left to right, in order.
-// Expr tree specific forms do not require requirements.
-void OlapScanNode::_peel_pushed_conjuncts() {
-    if (_vconjunct_ctx_ptr.get() == nullptr) return;
-
-    int leaf_index = 0;
-    vectorized::VExpr* conjunct_expr_root = 
(*_vconjunct_ctx_ptr.get())->root();
-
-    if (conjunct_expr_root != nullptr) {
-        vectorized::VExpr* new_conjunct_expr_root =
-                _dfs_peel_conjunct(conjunct_expr_root, leaf_index);
-        if (new_conjunct_expr_root == nullptr) {
-            _vconjunct_ctx_ptr = nullptr;
-            _scanner_profile->add_info_string("VconjunctExprTree", "null");
-        } else {
-            (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root);
-            _scanner_profile->add_info_string("VconjunctExprTree",
-                                              
new_conjunct_expr_root->debug_string());
-        }
-    }
-}
-
 } // namespace doris
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index 82e98d5..5ebd647 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H
-#define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H
+#pragma once
 
 #include <atomic>
 #include <condition_variable>
@@ -57,7 +56,7 @@ public:
     Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) 
override;
     Status collect_query_statistics(QueryStatistics* statistics) override;
     Status close(RuntimeState* state) override;
-    Status set_scan_ranges(const std::vector <TScanRangeParams>& scan_ranges) 
override;
+    Status set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges) 
override;
     inline void set_no_agg_finalize() { _need_agg_finalize = false; }
 
 protected:
@@ -137,7 +136,9 @@ protected:
     // Write debug string of this into out.
     void debug_string(int indentation_level, std::stringstream* out) const 
override {}
 
-    const std::vector<TRuntimeFilterDesc>& runtime_filter_descs() const { 
return _runtime_filter_descs; }
+    const std::vector<TRuntimeFilterDesc>& runtime_filter_descs() const {
+        return _runtime_filter_descs;
+    }
 
     void _init_counter(RuntimeState* state);
     // OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and 
SegmentIterator
@@ -328,11 +329,6 @@ protected:
 
     // for debugging or profiling, record any info as you want
     RuntimeProfile::Counter* _general_debug_timer[GENERAL_DEBUG_COUNT] = {};
-
-    vectorized::VExpr* _dfs_peel_conjunct(vectorized::VExpr* expr, int& 
leaf_index);
-    void _peel_pushed_conjuncts(); // remove pushed expr from conjunct tree
 };
 
 } // namespace doris
-
-#endif
diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp
index f292ed3..9f3e9ca 100644
--- a/be/src/exec/scan_node.cpp
+++ b/be/src/exec/scan_node.cpp
@@ -17,6 +17,8 @@
 
 #include "exec/scan_node.h"
 
+#include "vec/utils/util.hpp"
+
 namespace doris {
 
 const std::string ScanNode::_s_bytes_read_counter = "BytesRead";
@@ -40,4 +42,30 @@ Status ScanNode::prepare(RuntimeState* state) {
     return Status::OK();
 }
 
+// This function is used to remove pushed expr in expr tree.
+// It relies on the logic of function convertConjunctsToAndCompoundPredicate() 
of FE splicing expr.
+// It requires FE to satisfy each splicing with 'and' expr, and spliced from 
left to right, in order.
+// Expr tree specific forms do not require requirements.
+std::string ScanNode::_peel_pushed_vconjunct(const std::function<bool(int)>& 
checker) {
+    if (_vconjunct_ctx_ptr.get() == nullptr) {
+        return "null";
+    }
+
+    int leaf_index = 0;
+    vectorized::VExpr* conjunct_expr_root = 
(*_vconjunct_ctx_ptr.get())->root();
+
+    if (conjunct_expr_root != nullptr) {
+        vectorized::VExpr* new_conjunct_expr_root = 
vectorized::VectorizedUtils::dfs_peel_conjunct(
+                conjunct_expr_root, leaf_index, checker);
+        if (new_conjunct_expr_root == nullptr) {
+            _vconjunct_ctx_ptr = nullptr;
+        } else {
+            (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root);
+            return new_conjunct_expr_root->debug_string();
+        }
+    }
+
+    return "null";
+}
+
 } // namespace doris
diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h
index bd09991..9808b87 100644
--- a/be/src/exec/scan_node.h
+++ b/be/src/exec/scan_node.h
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H
-#define DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H
+#pragma once
 
 #include <string>
 
 #include "exec/exec_node.h"
 #include "gen_cpp/PaloInternalService_types.h"
 #include "util/runtime_profile.h"
+#include "vec/exprs/vexpr.h"
 
 namespace doris {
 
@@ -90,6 +90,9 @@ public:
     static const std::string _s_num_disks_accessed_counter;
 
 protected:
+    std::string _peel_pushed_vconjunct(
+            const std::function<bool(int)>& checker); // remove pushed expr 
from conjunct tree
+
     RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the 
scanner
     // # rows/tuples read from the scanner (including those discarded by 
eval_conjuncts())
     RuntimeProfile::Counter* _rows_read_counter;
@@ -99,5 +102,3 @@ protected:
 };
 
 } // namespace doris
-
-#endif
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index d9ec309..91f65f9 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -151,6 +151,7 @@ set(VEC_FILES
   functions/function_rpc.cpp
   functions/function_convert_tz.cpp
   functions/least_greast.cpp
+  functions/function_fake.cpp
   olap/vgeneric_iterators.cpp
   olap/vcollect_iterator.cpp
   olap/block_reader.cpp
diff --git a/be/src/vec/exec/ves_http_scanner.cpp 
b/be/src/vec/exec/ves_http_scanner.cpp
index 6087257..c06da06 100644
--- a/be/src/vec/exec/ves_http_scanner.cpp
+++ b/be/src/vec/exec/ves_http_scanner.cpp
@@ -17,7 +17,7 @@
 
 #include "vec/exec/ves_http_scanner.h"
 
-namespace doris {
+namespace doris::vectorized {
 
 VEsHttpScanner::~VEsHttpScanner() {
     close();
@@ -43,8 +43,8 @@ Status 
VEsHttpScanner::get_next(std::vector<vectorized::MutableColumnPtr>& colum
 
         COUNTER_UPDATE(_rows_read_counter, 1);
         SCOPED_TIMER(_materialize_timer);
-        RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, 
tuple_pool, &_line_eof,
-                                                      docvalue_context));
+        RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, 
tuple_pool,
+                                                        &_line_eof, 
docvalue_context));
         if (!_line_eof) {
             break;
         }
@@ -53,4 +53,4 @@ Status 
VEsHttpScanner::get_next(std::vector<vectorized::MutableColumnPtr>& colum
     return Status::OK();
 }
 
-} // namespace doris
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/ves_http_scanner.h 
b/be/src/vec/exec/ves_http_scanner.h
index 3692a61..5fa83db 100644
--- a/be/src/vec/exec/ves_http_scanner.h
+++ b/be/src/vec/exec/ves_http_scanner.h
@@ -16,28 +16,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef BE_EXEC_VES_HTTP_SCANNER_H
-#define BE_EXEC_VES_HTTP_SCANNER_H
+#pragma once
 
 #include <exec/es_http_scanner.h>
 
-namespace doris {
+namespace doris::vectorized {
 
 class VEsHttpScanner : public EsHttpScanner {
 public:
     VEsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId 
tuple_id,
-                  const std::map<std::string, std::string>& properties,
-                  const std::vector<ExprContext*>& conjunct_ctxs, 
EsScanCounter* counter,
-                  bool doc_value_mode): EsHttpScanner(state, profile, 
tuple_id, properties,
-                            conjunct_ctxs, counter, doc_value_mode) {};
+                   const std::map<std::string, std::string>& properties,
+                   const std::vector<ExprContext*>& conjunct_ctxs, 
EsScanCounter* counter,
+                   bool doc_value_mode)
+            : EsHttpScanner(state, profile, tuple_id, properties, 
conjunct_ctxs, counter,
+                            doc_value_mode) {};
     ~VEsHttpScanner();
 
-    Status get_next(std::vector<vectorized::MutableColumnPtr>& columns,
-                    MemPool* tuple_pool, bool* eof,
-                    const std::map<std::string, std::string>& 
docvalue_context);
-
+    Status get_next(std::vector<vectorized::MutableColumnPtr>& columns, 
MemPool* tuple_pool,
+                    bool* eof, const std::map<std::string, std::string>& 
docvalue_context);
 };
 
-} // namespace doris
-
-#endif
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_convert_tz.h 
b/be/src/vec/functions/function_convert_tz.h
index 7af61b3..f96031c 100644
--- a/be/src/vec/functions/function_convert_tz.h
+++ b/be/src/vec/functions/function_convert_tz.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "vec/columns/columns_number.h"
 #include "vec/common/string_ref.h"
 #include "vec/core/types.h"
diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h 
b/be/src/vec/functions/function_date_or_datetime_computation.h
index 7fdffef..5a106fd 100644
--- a/be/src/vec/functions/function_date_or_datetime_computation.h
+++ b/be/src/vec/functions/function_date_or_datetime_computation.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "common/logging.h"
 #include "fmt/format.h"
 #include "runtime/datetime_value.h"
diff --git a/be/src/vec/functions/function_date_or_datetime_to_something.h 
b/be/src/vec/functions/function_date_or_datetime_to_something.h
index efef7c3..1440126 100644
--- a/be/src/vec/functions/function_date_or_datetime_to_something.h
+++ b/be/src/vec/functions/function_date_or_datetime_to_something.h
@@ -18,6 +18,8 @@
 // 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToSomething.h
 // and modified by Doris
 
+#pragma once
+
 #include "vec/data_types/data_type_date.h"
 #include "vec/data_types/data_type_date_time.h"
 #include "vec/functions/date_time_transforms.h"
diff --git a/be/src/vec/functions/function_date_or_datetime_to_string.h 
b/be/src/vec/functions/function_date_or_datetime_to_string.h
index 42922c8..15cc226 100644
--- a/be/src/vec/functions/function_date_or_datetime_to_string.h
+++ b/be/src/vec/functions/function_date_or_datetime_to_string.h
@@ -18,6 +18,8 @@
 // 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToString.h
 // and modified by Doris
 
+#pragma once
+
 #include "vec/data_types/data_type_date.h"
 #include "vec/data_types/data_type_date_time.h"
 #include "vec/data_types/data_type_string.h"
diff --git a/be/src/vec/functions/function_datetime_string_to_string.h 
b/be/src/vec/functions/function_datetime_string_to_string.h
index b63bfcd..6502d0f 100644
--- a/be/src/vec/functions/function_datetime_string_to_string.h
+++ b/be/src/vec/functions/function_datetime_string_to_string.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/columns_number.h"
 #include "vec/data_types/data_type_date.h"
diff --git a/be/src/vec/functions/function_hash.h 
b/be/src/vec/functions/function_fake.cpp
similarity index 58%
copy from be/src/vec/functions/function_hash.h
copy to be/src/vec/functions/function_fake.cpp
index a908c36..0aa9bf0 100644
--- a/be/src/vec/functions/function_hash.h
+++ b/be/src/vec/functions/function_fake.cpp
@@ -14,27 +14,13 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-// This file is copied from
-// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.h
-// and modified by Doris
 
-#pragma once
-#include <utility>
-
-#include "vec/columns/column_string.h"
-#include "vec/columns/column_vector.h"
-#include "vec/common/bit_cast.h"
-#include "vec/common/hash_table/hash.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_number.h"
-#include "vec/functions/function.h"
-#include "vec/functions/function_helpers.h"
+#include "vec/functions/function_fake.h"
 
 namespace doris::vectorized {
 
-struct IntHash64Impl {
-    using ReturnType = UInt64;
+void register_function_fake(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionFake<FunctionEsqueryImpl>>();
+}
 
-    static UInt64 apply(UInt64 x) { return int_hash64(x ^ 
0x4CF2D2BAAE6DA887ULL); }
-};
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_multi_same_args.h 
b/be/src/vec/functions/function_fake.h
similarity index 72%
copy from be/src/vec/functions/function_multi_same_args.h
copy to be/src/vec/functions/function_fake.h
index 056544e..519fb08 100644
--- a/be/src/vec/functions/function_multi_same_args.h
+++ b/be/src/vec/functions/function_fake.h
@@ -15,41 +15,47 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "udf/udf.h"
-#include "vec/data_types/get_least_supertype.h"
-#include "vec/functions/function_helpers.h"
+#pragma once
+
+#include "common/status.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_number.h"
 #include "vec/functions/simple_function_factory.h"
-#include "vec/utils/template_helpers.hpp"
 #include "vec/utils/util.hpp"
 
 namespace doris::vectorized {
 
+struct FunctionEsqueryImpl {
+    static constexpr auto name = "esquery";
+    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
+        return std::make_shared<DataTypeUInt8>();
+    }
+};
+
+//FunctionFake is use for some function call expr only work at prepare/open 
phase, do not support execute().
 template <typename Impl>
-class FunctionMultiSameArgs : public IFunction {
+class FunctionFake : public IFunction {
 public:
     static constexpr auto name = Impl::name;
 
-    static FunctionPtr create() { return 
std::make_shared<FunctionMultiSameArgs>(); }
+    static FunctionPtr create() { return std::make_shared<FunctionFake>(); }
 
     String get_name() const override { return name; }
 
-    bool use_default_implementation_for_constants() const override { return 
true; }
-
-    bool use_default_implementation_for_nulls() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
 
     bool is_variadic() const override { return true; }
 
-    size_t get_number_of_arguments() const override { return 0; }
-
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         return Impl::get_return_type_impl(arguments);
     }
 
+    bool use_default_implementation_for_nulls() const override { return true; }
+
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
-        DCHECK_GE(arguments.size(), 1);
-        block.replace_by_position(result, Impl::execute(block, arguments, 
input_rows_count));
-        return Status::OK();
+        return Status::NotSupported(fmt::format("Fake function {} do not 
support execute", name));
     }
 };
-};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_grouping.h 
b/be/src/vec/functions/function_grouping.h
index f8ea725..01cfe49 100644
--- a/be/src/vec/functions/function_grouping.h
+++ b/be/src/vec/functions/function_grouping.h
@@ -15,14 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef DORIS_FUNCTION_GROUPING_H
-#define DORIS_FUNCTION_GROUPING_H
+#pragma once
 
-#include "vec/functions/simple_function_factory.h"
 #include "vec/columns/column_nullable.h"
+#include "vec/data_types/get_least_supertype.h"
 #include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
 #include "vec/utils/util.hpp"
-#include "vec/data_types/get_least_supertype.h"
 
 namespace doris::vectorized {
 
@@ -64,5 +63,4 @@ public:
 
     String get_name() const override { return name; }
 };
-}
-#endif //DORIS_FUNCTION_GROUPING_H
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_hash.h 
b/be/src/vec/functions/function_hash.h
index a908c36..7ea034b 100644
--- a/be/src/vec/functions/function_hash.h
+++ b/be/src/vec/functions/function_hash.h
@@ -19,6 +19,7 @@
 // and modified by Doris
 
 #pragma once
+
 #include <utility>
 
 #include "vec/columns/column_string.h"
diff --git a/be/src/vec/functions/function_ifnull.h 
b/be/src/vec/functions/function_ifnull.h
index 26fe18c..b662764 100644
--- a/be/src/vec/functions/function_ifnull.h
+++ b/be/src/vec/functions/function_ifnull.h
@@ -18,15 +18,14 @@
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Ifnull.h
 // and modified by Doris
 
-#ifndef DORIS_FUNCTION_IFNULL_H
-#define DORIS_FUNCTION_IFNULL_H
+#pragma once
 
-#include "vec/functions/simple_function_factory.h"
 #include "vec/columns/column_nullable.h"
+#include "vec/data_types/get_least_supertype.h"
 #include "vec/functions/function_helpers.h"
-#include "vec/utils/util.hpp"
 #include "vec/functions/function_string.h"
-#include "vec/data_types/get_least_supertype.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
 
 namespace doris::vectorized {
 class FunctionIfNull : public IFunction {
@@ -59,43 +58,35 @@ public:
             return Status::OK();
         }
 
-        ColumnWithTypeAndName null_column_arg0 {
-            nullptr, std::make_shared<DataTypeUInt8>(),""
-        };
-        ColumnWithTypeAndName nested_column_arg0 {
-            nullptr, col_left.type, ""
-        };
+        ColumnWithTypeAndName null_column_arg0 {nullptr, 
std::make_shared<DataTypeUInt8>(), ""};
+        ColumnWithTypeAndName nested_column_arg0 {nullptr, col_left.type, ""};
 
         /// implement isnull(col_left) logic
         if (auto* nullable = 
check_and_get_column<ColumnNullable>(*col_left.column)) {
             null_column_arg0.column = nullable->get_null_map_column_ptr();
             nested_column_arg0.column = nullable->get_nested_column_ptr();
-            nested_column_arg0.type = reinterpret_cast<const 
DataTypeNullable*>(
-                    nested_column_arg0.type.get())->get_nested_type();
+            nested_column_arg0.type =
+                    reinterpret_cast<const 
DataTypeNullable*>(nested_column_arg0.type.get())
+                            ->get_nested_type();
         } else {
             block.get_by_position(result).column = col_left.column;
             return Status::OK();
         }
-        const ColumnsWithTypeAndName if_columns
-        {
-            null_column_arg0,
-            block.get_by_position(arguments[1]),
-            nested_column_arg0
-        };
+        const ColumnsWithTypeAndName if_columns {
+                null_column_arg0, block.get_by_position(arguments[1]), 
nested_column_arg0};
 
-        Block temporary_block(
-                {
-                        null_column_arg0,
-                        block.get_by_position(arguments[1]),
-                        nested_column_arg0,
-                        block.get_by_position(result),
-                });
+        Block temporary_block({
+                null_column_arg0,
+                block.get_by_position(arguments[1]),
+                nested_column_arg0,
+                block.get_by_position(result),
+        });
 
-        auto func_if = SimpleFunctionFactory::instance().get_function("if", 
if_columns, block.get_by_position(result).type);
+        auto func_if = SimpleFunctionFactory::instance().get_function(
+                "if", if_columns, block.get_by_position(result).type);
         func_if->execute(context, temporary_block, {0, 1, 2}, 3, 
input_rows_count);
         block.get_by_position(result).column = 
temporary_block.get_by_position(3).column;
         return Status::OK();
     }
 };
-}
-#endif //DORIS_FUNCTION_IFNULL_H
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_multi_same_args.h 
b/be/src/vec/functions/function_multi_same_args.h
index 056544e..95ecd1d 100644
--- a/be/src/vec/functions/function_multi_same_args.h
+++ b/be/src/vec/functions/function_multi_same_args.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "udf/udf.h"
 #include "vec/data_types/get_least_supertype.h"
 #include "vec/functions/function_helpers.h"
@@ -52,4 +54,4 @@ public:
         return Status::OK();
     }
 };
-};
+}; // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_rpc.h 
b/be/src/vec/functions/function_rpc.h
index 2c7535a..43bfe3a 100644
--- a/be/src/vec/functions/function_rpc.h
+++ b/be/src/vec/functions/function_rpc.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "vec/functions/function.h"
 
 namespace doris {
diff --git a/be/src/vec/functions/function_string_to_string.h 
b/be/src/vec/functions/function_string_to_string.h
index cd4ae44..78e81f7 100644
--- a/be/src/vec/functions/function_string_to_string.h
+++ b/be/src/vec/functions/function_string_to_string.h
@@ -18,6 +18,8 @@
 // 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringToString.h
 // and modified by Doris
 
+#pragma once
+
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_vector.h"
 #include "vec/data_types/data_type_number.h"
diff --git a/be/src/vec/functions/simple_function_factory.h 
b/be/src/vec/functions/simple_function_factory.h
index 390418d..7ab9061 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -19,6 +19,7 @@
 // and modified by Doris
 
 #pragma once
+
 #include <mutex>
 #include <string>
 
@@ -71,6 +72,7 @@ void register_function_grouping(SimpleFunctionFactory& 
factory);
 void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory);
 void register_function_convert_tz(SimpleFunctionFactory& factory);
 void register_function_least_greast(SimpleFunctionFactory& factory);
+void register_function_fake(SimpleFunctionFactory& factory);
 
 class SimpleFunctionFactory {
     using Creator = std::function<FunctionBuilderPtr()>;
@@ -191,6 +193,7 @@ public:
             register_function_datetime_floor_ceil(instance);
             register_function_convert_tz(instance);
             register_function_least_greast(instance);
+            register_function_fake(instance);
         });
         return instance;
     }
diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp
index 04c5da7..6cf9a14 100644
--- a/be/src/vec/utils/util.hpp
+++ b/be/src/vec/utils/util.hpp
@@ -16,6 +16,7 @@
 // under the License.
 
 #pragma once
+
 #include <thrift/protocol/TJSONProtocol.h>
 
 #include <boost/shared_ptr.hpp>
@@ -23,6 +24,7 @@
 #include "runtime/descriptors.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/core/block.h"
+#include "vec/exprs/vexpr.h"
 
 namespace doris::vectorized {
 class VectorizedUtils {
@@ -47,8 +49,9 @@ public:
         size_t size = dst.size();
         auto* __restrict l = dst.data();
         auto* __restrict r = src.data();
-        for (size_t i = 0; i < size; ++i)
+        for (size_t i = 0; i < size; ++i) {
             l[i] |= r[i];
+        }
     }
 
     static DataTypes get_data_types(const RowDescriptor& row_desc) {
@@ -60,7 +63,27 @@ public:
         }
         return data_types;
     }
+
+    static VExpr* dfs_peel_conjunct(VExpr* expr, int& leaf_index,
+                                    std::function<bool(int)> checker) {
+        static constexpr auto is_leaf = [](VExpr* expr) { return 
!expr->is_and_expr(); };
+
+        if (is_leaf(expr)) {
+            return checker(leaf_index++) ? nullptr : expr;
+        } else {
+            VExpr* left_child = dfs_peel_conjunct(expr->children()[0], 
leaf_index, checker);
+            VExpr* right_child = dfs_peel_conjunct(expr->children()[1], 
leaf_index, checker);
+
+            if (left_child != nullptr && right_child != nullptr) {
+                expr->set_children({left_child, right_child});
+                return expr;
+            }
+            // here do not close Expr* now
+            return left_child != nullptr ? left_child : right_child;
+        }
+    }
 };
+
 } // namespace doris::vectorized
 
 namespace apache::thrift {
@@ -76,4 +99,5 @@ ThriftStruct from_json_string(const std::string& json_val) {
     ts.read(&protocol);
     return ts;
 }
+
 } // namespace apache::thrift
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 4d1f5d3..8bd6829 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -823,7 +823,7 @@ visible_functions = [
 
     [['esquery'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris11ESFunctions5matchEPN'
-        '9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
+        '9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''],
 
     # String builtin functions
     [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'],

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to