This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.0 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit a78bfecaaec7684a221d3b0593c9ffa94f31643e Author: Pxl <952130...@qq.com> AuthorDate: Sat Feb 26 17:02:54 2022 +0800 [improvement][vectorized] support es node predicate peel (#8174) --- be/src/exec/es/es_predicate.cpp | 2 +- be/src/exec/es/es_predicate.h | 4 +- be/src/exec/es/es_query_builder.cpp | 121 ++++++++++----------- be/src/exec/es/es_query_builder.h | 1 + be/src/exec/es_http_scan_node.cpp | 24 +++- be/src/exec/es_http_scan_node.h | 10 +- be/src/exec/olap_scan_node.cpp | 58 ++-------- be/src/exec/olap_scan_node.h | 14 +-- be/src/exec/scan_node.cpp | 28 +++++ be/src/exec/scan_node.h | 9 +- be/src/vec/CMakeLists.txt | 1 + be/src/vec/exec/ves_http_scanner.cpp | 8 +- be/src/vec/exec/ves_http_scanner.h | 24 ++-- be/src/vec/functions/function_convert_tz.h | 2 + .../function_date_or_datetime_computation.h | 2 + .../function_date_or_datetime_to_something.h | 2 + .../function_date_or_datetime_to_string.h | 2 + .../functions/function_datetime_string_to_string.h | 2 + .../{function_hash.h => function_fake.cpp} | 22 +--- ...{function_multi_same_args.h => function_fake.h} | 36 +++--- be/src/vec/functions/function_grouping.h | 10 +- be/src/vec/functions/function_hash.h | 1 + be/src/vec/functions/function_ifnull.h | 49 ++++----- be/src/vec/functions/function_multi_same_args.h | 4 +- be/src/vec/functions/function_rpc.h | 2 + be/src/vec/functions/function_string_to_string.h | 2 + be/src/vec/functions/simple_function_factory.h | 3 + be/src/vec/utils/util.hpp | 26 ++++- gensrc/script/doris_builtins_functions.py | 2 +- 29 files changed, 244 insertions(+), 227 deletions(-) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 3b8be66..7dab9ca 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -181,7 +181,7 @@ Status EsPredicate::build_disjuncts_list() { } // make sure to build by build_disjuncts_list -const std::vector<ExtPredicate*>& EsPredicate::get_predicate_list() { +const std::vector<ExtPredicate*>& EsPredicate::get_predicate_list() const { return _disjuncts; } diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h index 825bc5e..826af0a 100644 --- a/be/src/exec/es/es_predicate.h +++ b/be/src/exec/es/es_predicate.h @@ -141,10 +141,10 @@ class EsPredicate { public: EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc, ObjectPool* pool); ~EsPredicate(); - const std::vector<ExtPredicate*>& get_predicate_list(); + const std::vector<ExtPredicate*>& get_predicate_list() const; Status build_disjuncts_list(); // public for tests - EsPredicate(const std::vector<ExtPredicate*>& all_predicates) { _disjuncts = all_predicates; }; + EsPredicate(const std::vector<ExtPredicate*>& all_predicates) { _disjuncts = all_predicates; } Status get_es_query_status() { return _es_query_status; } diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp index 5f8dcfd..1f9488e 100644 --- a/be/src/exec/es/es_query_builder.cpp +++ b/be/src/exec/es/es_query_builder.cpp @@ -54,19 +54,19 @@ TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) : _field(binary_predicate.col.name), _match_none(false) { - if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) { - int val = atoi(binary_predicate.value.to_string().c_str()); - if (val == 1) { - _term = std::string("true"); - } else if (val == 0){ - _term = std::string("false"); - } else { - // keep semantic consistent with mysql - _match_none = true; - } + if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) { + int val = atoi(binary_predicate.value.to_string().c_str()); + if (val == 1) { + _term = std::string("true"); + } else if (val == 0) { + _term = std::string("false"); } else { - _term = binary_predicate.value.to_string(); + // keep semantic consistent with mysql + _match_none = true; } + } else { + _term = binary_predicate.value.to_string(); + } } void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { @@ -82,7 +82,6 @@ void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* // this would only appear `bool` column's predicate (a = 2) query->AddMember("match_none", term_node, allocator); } - } RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) @@ -381,66 +380,62 @@ Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { void BooleanQueryBuilder::validate(const std::vector<EsPredicate*>& espredicates, std::vector<bool>* result) { - int conjunct_size = espredicates.size(); - result->reserve(conjunct_size); for (auto espredicate : espredicates) { - bool flag = true; - for (auto predicate : espredicate->get_predicate_list()) { - switch (predicate->node_type) { - case TExprNodeType::BINARY_PRED: { - ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - TExprOpcode::type op = binary_predicate->op; - if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != TExprOpcode::LT && - op != TExprOpcode::LE && op != TExprOpcode::GT && op != TExprOpcode::GE) { - flag = false; - } - break; - } - case TExprNodeType::COMPOUND_PRED: { - ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; - if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { - std::vector<bool> list; - validate(compound_predicates->conjuncts, &list); - for (int i = list.size() - 1; i >= 0; i--) { - if (!list[i]) { - flag = false; - break; - } - } - } else { - // reserved for compound_not - flag = false; - } - break; + result->push_back(validate(espredicate)); + } +} + +bool BooleanQueryBuilder::validate(const EsPredicate* espredicate) { + for (auto predicate : espredicate->get_predicate_list()) { + switch (predicate->node_type) { + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + TExprOpcode::type op = binary_predicate->op; + if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != TExprOpcode::LT && + op != TExprOpcode::LE && op != TExprOpcode::GT && op != TExprOpcode::GE) { + return false; } - case TExprNodeType::LIKE_PRED: - case TExprNodeType::IS_NULL_PRED: - case TExprNodeType::IN_PRED: { - break; + break; + } + case TExprNodeType::COMPOUND_PRED: { + ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; + if (compound_predicates->op != TExprOpcode::COMPOUND_AND) { + // reserved for compound_not + return false; } - case TExprNodeType::FUNCTION_CALL: { - ExtFunction* function_predicate = (ExtFunction*)predicate; - if ("esquery" == function_predicate->func_name) { - Status st = check_es_query(*function_predicate); - if (!st.ok()) { - flag = false; - } - } else { - flag = false; + std::vector<bool> list; + validate(compound_predicates->conjuncts, &list); + for (int i = list.size() - 1; i >= 0; i--) { + if (!list[i]) { + return false; } - break; - } - default: { - flag = false; - break; } + break; + } + case TExprNodeType::LIKE_PRED: + case TExprNodeType::IS_NULL_PRED: + case TExprNodeType::IN_PRED: { + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction*)predicate; + if ("esquery" != function_predicate->func_name) { + return false; } - if (!flag) { - break; + Status st = check_es_query(*function_predicate); + if (!st.ok()) { + return false; } + break; + } + default: { + return false; + break; + } } - result->push_back(flag); } + + return true; } void BooleanQueryBuilder::to_query(const std::vector<EsPredicate*>& predicates, diff --git a/be/src/exec/es/es_query_builder.h b/be/src/exec/es/es_query_builder.h index c471380..6a5d16d 100644 --- a/be/src/exec/es/es_query_builder.h +++ b/be/src/exec/es/es_query_builder.h @@ -119,6 +119,7 @@ public: static Status check_es_query(const ExtFunction& extFunction); // decide which predicate can process static void validate(const std::vector<EsPredicate*>& espredicates, std::vector<bool>* result); + static bool validate(const EsPredicate* espredicate); private: // add child query diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 7b67486..64b9792 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -68,6 +68,9 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { VLOG_QUERY << "EsHttpScanNode prepare"; RETURN_IF_ERROR(ScanNode::prepare(state)); + _scanner_profile.reset(new RuntimeProfile("EsHttpScanNode")); + runtime_profile()->add_child(_scanner_profile.get(), true, nullptr); + _runtime_state = state; _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_tuple_desc == nullptr) { @@ -92,14 +95,20 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { // build predicate Status EsHttpScanNode::build_conjuncts_list() { Status status = Status::OK(); + _conjunct_to_predicate.resize(_conjunct_ctxs.size()); + for (int i = 0; i < _conjunct_ctxs.size(); ++i) { EsPredicate* predicate = _pool->add(new EsPredicate(_conjunct_ctxs[i], _tuple_desc, _pool)); predicate->set_field_context(_fields_context); status = predicate->build_disjuncts_list(); if (status.ok()) { - _predicates.push_back(predicate); + _conjunct_to_predicate[i] = _predicate_to_conjunct.size(); _predicate_to_conjunct.push_back(i); + + _predicates.push_back(predicate); } else { + _conjunct_to_predicate[i] = -1; + VLOG_CRITICAL << status.get_error_msg(); status = predicate->get_es_query_status(); if (!status.ok()) { @@ -133,6 +142,7 @@ Status EsHttpScanNode::open(RuntimeState* state) { // remove those predicates which ES cannot support std::vector<bool> list; BooleanQueryBuilder::validate(_predicates, &list); + DCHECK(list.size() == _predicate_to_conjunct.size()); for (int i = list.size() - 1; i >= 0; i--) { if (!list[i]) { @@ -148,6 +158,12 @@ Status EsHttpScanNode::open(RuntimeState* state) { _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index); } + auto checker = [&](int index) { + return _conjunct_to_predicate[index] != -1 && list[_conjunct_to_predicate[index]]; + }; + std::string vconjunct_information = _peel_pushed_vconjunct(checker); + _scanner_profile->add_info_string("VconjunctExprTree", vconjunct_information); + RETURN_IF_ERROR(start_scanners()); return Status::OK(); @@ -443,9 +459,9 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise<Stat scanner_expr_ctxs, &counter, doc_value_mode)); status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); } else { - std::unique_ptr<VEsHttpScanner> scanner( - new VEsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties, - scanner_expr_ctxs, &counter, doc_value_mode)); + std::unique_ptr<vectorized::VEsHttpScanner> scanner(new vectorized::VEsHttpScanner( + _runtime_state, runtime_profile(), _tuple_id, properties, scanner_expr_ctxs, + &counter, doc_value_mode)); status = scanner_scan(std::move(scanner)); } if (!status.ok()) { diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index 2564294..0fc9fac 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef BE_EXEC_ES_HTTP_SCAN_NODE_H -#define BE_EXEC_ES_HTTP_SCAN_NODE_H +#pragma once #include <atomic> #include <condition_variable> @@ -99,7 +98,7 @@ private: Status scanner_scan(std::unique_ptr<EsHttpScanner> scanner, const std::vector<ExprContext*>& conjunct_ctxs, EsScanCounter* counter); - virtual Status scanner_scan(std::unique_ptr<VEsHttpScanner> scanner) { + virtual Status scanner_scan(std::unique_ptr<vectorized::VEsHttpScanner> scanner) { return Status::NotSupported("vectorized scan in EsHttpScanNode is not supported!"); }; @@ -117,8 +116,9 @@ private: std::vector<EsPredicate*> _predicates; std::vector<int> _predicate_to_conjunct; + std::vector<int> _conjunct_to_predicate; + + std::unique_ptr<RuntimeProfile> _scanner_profile; }; } // namespace doris - -#endif diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index f1f0169..af26ae1 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -83,10 +83,8 @@ Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) { for (int i = 0; i < filter_size; ++i) { IRuntimeFilter* runtime_filter = nullptr; const auto& filter_desc = _runtime_filter_descs[i]; - RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter(RuntimeFilterRole::CONSUMER, - filter_desc, - state->query_options(), - id())); + RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter( + RuntimeFilterRole::CONSUMER, filter_desc, state->query_options(), id())); RETURN_IF_ERROR(state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id, &runtime_filter)); @@ -162,7 +160,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _olap_wait_batch_queue_timer = ADD_TIMER(_runtime_profile, "BatchQueueWaitTime"); // for the purpose of debugging or profiling - for (int i = 0; i < sizeof(_general_debug_timer)/sizeof(*_general_debug_timer); ++i) { + for (int i = 0; i < GENERAL_DEBUG_COUNT; ++i) { char name[64]; snprintf(name, sizeof(name), "GeneralDebugTimer%d", i); _general_debug_timer[i] = ADD_TIMER(_segment_profile, name); @@ -529,6 +527,7 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) { iter->second->runtimefilter->set_push_down_profile(); } } + // set vconjunct_ctx is empty, if all conjunct if (_direct_conjunct_size == 0) { if (_vconjunct_ctx_ptr.get() != nullptr) { @@ -536,8 +535,11 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) { _vconjunct_ctx_ptr = nullptr; } } + // filter idle conjunct in vexpr_contexts - _peel_pushed_conjuncts(); + auto checker = [&](int index) { return _pushed_conjuncts_index.count(index); }; + std::string vconjunct_information = _peel_pushed_vconjunct(checker); + _scanner_profile->add_info_string("VconjunctExprTree", vconjunct_information); } void OlapScanNode::eval_const_conjuncts() { @@ -1679,48 +1681,4 @@ Status OlapScanNode::add_one_batch(RowBatch* row_batch) { _row_batch_added_cv.notify_one(); return Status::OK(); } - - -vectorized::VExpr* OlapScanNode::_dfs_peel_conjunct(vectorized::VExpr* expr, int& leaf_index) { - static constexpr auto is_leaf = [](vectorized::VExpr* expr) { return !expr->is_and_expr(); }; - - if (is_leaf(expr)) { - return _pushed_conjuncts_index.count(leaf_index++) ? nullptr : expr; - } else { - vectorized::VExpr* left_child = _dfs_peel_conjunct(expr->children()[0], leaf_index); - vectorized::VExpr* right_child = _dfs_peel_conjunct(expr->children()[1], leaf_index); - - if (left_child != nullptr && right_child != nullptr) { - expr->set_children({left_child, right_child}); - return expr; - } - // here do not close Expr* now - return left_child != nullptr ? left_child : right_child; - } -} - -// This function is used to remove pushed expr in expr tree. -// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr. -// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order. -// Expr tree specific forms do not require requirements. -void OlapScanNode::_peel_pushed_conjuncts() { - if (_vconjunct_ctx_ptr.get() == nullptr) return; - - int leaf_index = 0; - vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr.get())->root(); - - if (conjunct_expr_root != nullptr) { - vectorized::VExpr* new_conjunct_expr_root = - _dfs_peel_conjunct(conjunct_expr_root, leaf_index); - if (new_conjunct_expr_root == nullptr) { - _vconjunct_ctx_ptr = nullptr; - _scanner_profile->add_info_string("VconjunctExprTree", "null"); - } else { - (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root); - _scanner_profile->add_info_string("VconjunctExprTree", - new_conjunct_expr_root->debug_string()); - } - } -} - } // namespace doris diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 82e98d5..5ebd647 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H +#pragma once #include <atomic> #include <condition_variable> @@ -57,7 +56,7 @@ public: Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; Status collect_query_statistics(QueryStatistics* statistics) override; Status close(RuntimeState* state) override; - Status set_scan_ranges(const std::vector <TScanRangeParams>& scan_ranges) override; + Status set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges) override; inline void set_no_agg_finalize() { _need_agg_finalize = false; } protected: @@ -137,7 +136,9 @@ protected: // Write debug string of this into out. void debug_string(int indentation_level, std::stringstream* out) const override {} - const std::vector<TRuntimeFilterDesc>& runtime_filter_descs() const { return _runtime_filter_descs; } + const std::vector<TRuntimeFilterDesc>& runtime_filter_descs() const { + return _runtime_filter_descs; + } void _init_counter(RuntimeState* state); // OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and SegmentIterator @@ -328,11 +329,6 @@ protected: // for debugging or profiling, record any info as you want RuntimeProfile::Counter* _general_debug_timer[GENERAL_DEBUG_COUNT] = {}; - - vectorized::VExpr* _dfs_peel_conjunct(vectorized::VExpr* expr, int& leaf_index); - void _peel_pushed_conjuncts(); // remove pushed expr from conjunct tree }; } // namespace doris - -#endif diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp index f292ed3..9f3e9ca 100644 --- a/be/src/exec/scan_node.cpp +++ b/be/src/exec/scan_node.cpp @@ -17,6 +17,8 @@ #include "exec/scan_node.h" +#include "vec/utils/util.hpp" + namespace doris { const std::string ScanNode::_s_bytes_read_counter = "BytesRead"; @@ -40,4 +42,30 @@ Status ScanNode::prepare(RuntimeState* state) { return Status::OK(); } +// This function is used to remove pushed expr in expr tree. +// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr. +// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order. +// Expr tree specific forms do not require requirements. +std::string ScanNode::_peel_pushed_vconjunct(const std::function<bool(int)>& checker) { + if (_vconjunct_ctx_ptr.get() == nullptr) { + return "null"; + } + + int leaf_index = 0; + vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr.get())->root(); + + if (conjunct_expr_root != nullptr) { + vectorized::VExpr* new_conjunct_expr_root = vectorized::VectorizedUtils::dfs_peel_conjunct( + conjunct_expr_root, leaf_index, checker); + if (new_conjunct_expr_root == nullptr) { + _vconjunct_ctx_ptr = nullptr; + } else { + (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root); + return new_conjunct_expr_root->debug_string(); + } + } + + return "null"; +} + } // namespace doris diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h index bd09991..9808b87 100644 --- a/be/src/exec/scan_node.h +++ b/be/src/exec/scan_node.h @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H +#pragma once #include <string> #include "exec/exec_node.h" #include "gen_cpp/PaloInternalService_types.h" #include "util/runtime_profile.h" +#include "vec/exprs/vexpr.h" namespace doris { @@ -90,6 +90,9 @@ public: static const std::string _s_num_disks_accessed_counter; protected: + std::string _peel_pushed_vconjunct( + const std::function<bool(int)>& checker); // remove pushed expr from conjunct tree + RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner // # rows/tuples read from the scanner (including those discarded by eval_conjuncts()) RuntimeProfile::Counter* _rows_read_counter; @@ -99,5 +102,3 @@ protected: }; } // namespace doris - -#endif diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index d9ec309..91f65f9 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -151,6 +151,7 @@ set(VEC_FILES functions/function_rpc.cpp functions/function_convert_tz.cpp functions/least_greast.cpp + functions/function_fake.cpp olap/vgeneric_iterators.cpp olap/vcollect_iterator.cpp olap/block_reader.cpp diff --git a/be/src/vec/exec/ves_http_scanner.cpp b/be/src/vec/exec/ves_http_scanner.cpp index 6087257..c06da06 100644 --- a/be/src/vec/exec/ves_http_scanner.cpp +++ b/be/src/vec/exec/ves_http_scanner.cpp @@ -17,7 +17,7 @@ #include "vec/exec/ves_http_scanner.h" -namespace doris { +namespace doris::vectorized { VEsHttpScanner::~VEsHttpScanner() { close(); @@ -43,8 +43,8 @@ Status VEsHttpScanner::get_next(std::vector<vectorized::MutableColumnPtr>& colum COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, tuple_pool, &_line_eof, - docvalue_context)); + RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, tuple_pool, + &_line_eof, docvalue_context)); if (!_line_eof) { break; } @@ -53,4 +53,4 @@ Status VEsHttpScanner::get_next(std::vector<vectorized::MutableColumnPtr>& colum return Status::OK(); } -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exec/ves_http_scanner.h b/be/src/vec/exec/ves_http_scanner.h index 3692a61..5fa83db 100644 --- a/be/src/vec/exec/ves_http_scanner.h +++ b/be/src/vec/exec/ves_http_scanner.h @@ -16,28 +16,24 @@ // specific language governing permissions and limitations // under the License. -#ifndef BE_EXEC_VES_HTTP_SCANNER_H -#define BE_EXEC_VES_HTTP_SCANNER_H +#pragma once #include <exec/es_http_scanner.h> -namespace doris { +namespace doris::vectorized { class VEsHttpScanner : public EsHttpScanner { public: VEsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, - const std::map<std::string, std::string>& properties, - const std::vector<ExprContext*>& conjunct_ctxs, EsScanCounter* counter, - bool doc_value_mode): EsHttpScanner(state, profile, tuple_id, properties, - conjunct_ctxs, counter, doc_value_mode) {}; + const std::map<std::string, std::string>& properties, + const std::vector<ExprContext*>& conjunct_ctxs, EsScanCounter* counter, + bool doc_value_mode) + : EsHttpScanner(state, profile, tuple_id, properties, conjunct_ctxs, counter, + doc_value_mode) {}; ~VEsHttpScanner(); - Status get_next(std::vector<vectorized::MutableColumnPtr>& columns, - MemPool* tuple_pool, bool* eof, - const std::map<std::string, std::string>& docvalue_context); - + Status get_next(std::vector<vectorized::MutableColumnPtr>& columns, MemPool* tuple_pool, + bool* eof, const std::map<std::string, std::string>& docvalue_context); }; -} // namespace doris - -#endif +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 7af61b3..f96031c 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "vec/columns/columns_number.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index 7fdffef..5a106fd 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "common/logging.h" #include "fmt/format.h" #include "runtime/datetime_value.h" diff --git a/be/src/vec/functions/function_date_or_datetime_to_something.h b/be/src/vec/functions/function_date_or_datetime_to_something.h index efef7c3..1440126 100644 --- a/be/src/vec/functions/function_date_or_datetime_to_something.h +++ b/be/src/vec/functions/function_date_or_datetime_to_something.h @@ -18,6 +18,8 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToSomething.h // and modified by Doris +#pragma once + #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/functions/date_time_transforms.h" diff --git a/be/src/vec/functions/function_date_or_datetime_to_string.h b/be/src/vec/functions/function_date_or_datetime_to_string.h index 42922c8..15cc226 100644 --- a/be/src/vec/functions/function_date_or_datetime_to_string.h +++ b/be/src/vec/functions/function_date_or_datetime_to_string.h @@ -18,6 +18,8 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToString.h // and modified by Doris +#pragma once + #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_string.h" diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index b63bfcd..6502d0f 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "vec/columns/column_nullable.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_date.h" diff --git a/be/src/vec/functions/function_hash.h b/be/src/vec/functions/function_fake.cpp similarity index 58% copy from be/src/vec/functions/function_hash.h copy to be/src/vec/functions/function_fake.cpp index a908c36..0aa9bf0 100644 --- a/be/src/vec/functions/function_hash.h +++ b/be/src/vec/functions/function_fake.cpp @@ -14,27 +14,13 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.h -// and modified by Doris -#pragma once -#include <utility> - -#include "vec/columns/column_string.h" -#include "vec/columns/column_vector.h" -#include "vec/common/bit_cast.h" -#include "vec/common/hash_table/hash.h" -#include "vec/data_types/data_type.h" -#include "vec/data_types/data_type_number.h" -#include "vec/functions/function.h" -#include "vec/functions/function_helpers.h" +#include "vec/functions/function_fake.h" namespace doris::vectorized { -struct IntHash64Impl { - using ReturnType = UInt64; +void register_function_fake(SimpleFunctionFactory& factory) { + factory.register_function<FunctionFake<FunctionEsqueryImpl>>(); +} - static UInt64 apply(UInt64 x) { return int_hash64(x ^ 0x4CF2D2BAAE6DA887ULL); } -}; } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_multi_same_args.h b/be/src/vec/functions/function_fake.h similarity index 72% copy from be/src/vec/functions/function_multi_same_args.h copy to be/src/vec/functions/function_fake.h index 056544e..519fb08 100644 --- a/be/src/vec/functions/function_multi_same_args.h +++ b/be/src/vec/functions/function_fake.h @@ -15,41 +15,47 @@ // specific language governing permissions and limitations // under the License. -#include "udf/udf.h" -#include "vec/data_types/get_least_supertype.h" -#include "vec/functions/function_helpers.h" +#pragma once + +#include "common/status.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" #include "vec/functions/simple_function_factory.h" -#include "vec/utils/template_helpers.hpp" #include "vec/utils/util.hpp" namespace doris::vectorized { +struct FunctionEsqueryImpl { + static constexpr auto name = "esquery"; + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + return std::make_shared<DataTypeUInt8>(); + } +}; + +//FunctionFake is use for some function call expr only work at prepare/open phase, do not support execute(). template <typename Impl> -class FunctionMultiSameArgs : public IFunction { +class FunctionFake : public IFunction { public: static constexpr auto name = Impl::name; - static FunctionPtr create() { return std::make_shared<FunctionMultiSameArgs>(); } + static FunctionPtr create() { return std::make_shared<FunctionFake>(); } String get_name() const override { return name; } - bool use_default_implementation_for_constants() const override { return true; } - - bool use_default_implementation_for_nulls() const override { return true; } + size_t get_number_of_arguments() const override { return 0; } bool is_variadic() const override { return true; } - size_t get_number_of_arguments() const override { return 0; } - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return Impl::get_return_type_impl(arguments); } + bool use_default_implementation_for_nulls() const override { return true; } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { - DCHECK_GE(arguments.size(), 1); - block.replace_by_position(result, Impl::execute(block, arguments, input_rows_count)); - return Status::OK(); + return Status::NotSupported(fmt::format("Fake function {} do not support execute", name)); } }; -}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_grouping.h b/be/src/vec/functions/function_grouping.h index f8ea725..01cfe49 100644 --- a/be/src/vec/functions/function_grouping.h +++ b/be/src/vec/functions/function_grouping.h @@ -15,14 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_FUNCTION_GROUPING_H -#define DORIS_FUNCTION_GROUPING_H +#pragma once -#include "vec/functions/simple_function_factory.h" #include "vec/columns/column_nullable.h" +#include "vec/data_types/get_least_supertype.h" #include "vec/functions/function_helpers.h" +#include "vec/functions/simple_function_factory.h" #include "vec/utils/util.hpp" -#include "vec/data_types/get_least_supertype.h" namespace doris::vectorized { @@ -64,5 +63,4 @@ public: String get_name() const override { return name; } }; -} -#endif //DORIS_FUNCTION_GROUPING_H +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_hash.h b/be/src/vec/functions/function_hash.h index a908c36..7ea034b 100644 --- a/be/src/vec/functions/function_hash.h +++ b/be/src/vec/functions/function_hash.h @@ -19,6 +19,7 @@ // and modified by Doris #pragma once + #include <utility> #include "vec/columns/column_string.h" diff --git a/be/src/vec/functions/function_ifnull.h b/be/src/vec/functions/function_ifnull.h index 26fe18c..b662764 100644 --- a/be/src/vec/functions/function_ifnull.h +++ b/be/src/vec/functions/function_ifnull.h @@ -18,15 +18,14 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Ifnull.h // and modified by Doris -#ifndef DORIS_FUNCTION_IFNULL_H -#define DORIS_FUNCTION_IFNULL_H +#pragma once -#include "vec/functions/simple_function_factory.h" #include "vec/columns/column_nullable.h" +#include "vec/data_types/get_least_supertype.h" #include "vec/functions/function_helpers.h" -#include "vec/utils/util.hpp" #include "vec/functions/function_string.h" -#include "vec/data_types/get_least_supertype.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" namespace doris::vectorized { class FunctionIfNull : public IFunction { @@ -59,43 +58,35 @@ public: return Status::OK(); } - ColumnWithTypeAndName null_column_arg0 { - nullptr, std::make_shared<DataTypeUInt8>(),"" - }; - ColumnWithTypeAndName nested_column_arg0 { - nullptr, col_left.type, "" - }; + ColumnWithTypeAndName null_column_arg0 {nullptr, std::make_shared<DataTypeUInt8>(), ""}; + ColumnWithTypeAndName nested_column_arg0 {nullptr, col_left.type, ""}; /// implement isnull(col_left) logic if (auto* nullable = check_and_get_column<ColumnNullable>(*col_left.column)) { null_column_arg0.column = nullable->get_null_map_column_ptr(); nested_column_arg0.column = nullable->get_nested_column_ptr(); - nested_column_arg0.type = reinterpret_cast<const DataTypeNullable*>( - nested_column_arg0.type.get())->get_nested_type(); + nested_column_arg0.type = + reinterpret_cast<const DataTypeNullable*>(nested_column_arg0.type.get()) + ->get_nested_type(); } else { block.get_by_position(result).column = col_left.column; return Status::OK(); } - const ColumnsWithTypeAndName if_columns - { - null_column_arg0, - block.get_by_position(arguments[1]), - nested_column_arg0 - }; + const ColumnsWithTypeAndName if_columns { + null_column_arg0, block.get_by_position(arguments[1]), nested_column_arg0}; - Block temporary_block( - { - null_column_arg0, - block.get_by_position(arguments[1]), - nested_column_arg0, - block.get_by_position(result), - }); + Block temporary_block({ + null_column_arg0, + block.get_by_position(arguments[1]), + nested_column_arg0, + block.get_by_position(result), + }); - auto func_if = SimpleFunctionFactory::instance().get_function("if", if_columns, block.get_by_position(result).type); + auto func_if = SimpleFunctionFactory::instance().get_function( + "if", if_columns, block.get_by_position(result).type); func_if->execute(context, temporary_block, {0, 1, 2}, 3, input_rows_count); block.get_by_position(result).column = temporary_block.get_by_position(3).column; return Status::OK(); } }; -} -#endif //DORIS_FUNCTION_IFNULL_H +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_multi_same_args.h b/be/src/vec/functions/function_multi_same_args.h index 056544e..95ecd1d 100644 --- a/be/src/vec/functions/function_multi_same_args.h +++ b/be/src/vec/functions/function_multi_same_args.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "udf/udf.h" #include "vec/data_types/get_least_supertype.h" #include "vec/functions/function_helpers.h" @@ -52,4 +54,4 @@ public: return Status::OK(); } }; -}; +}; // namespace doris::vectorized diff --git a/be/src/vec/functions/function_rpc.h b/be/src/vec/functions/function_rpc.h index 2c7535a..43bfe3a 100644 --- a/be/src/vec/functions/function_rpc.h +++ b/be/src/vec/functions/function_rpc.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "vec/functions/function.h" namespace doris { diff --git a/be/src/vec/functions/function_string_to_string.h b/be/src/vec/functions/function_string_to_string.h index cd4ae44..78e81f7 100644 --- a/be/src/vec/functions/function_string_to_string.h +++ b/be/src/vec/functions/function_string_to_string.h @@ -18,6 +18,8 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringToString.h // and modified by Doris +#pragma once + #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type_number.h" diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 390418d..7ab9061 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -19,6 +19,7 @@ // and modified by Doris #pragma once + #include <mutex> #include <string> @@ -71,6 +72,7 @@ void register_function_grouping(SimpleFunctionFactory& factory); void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory); void register_function_convert_tz(SimpleFunctionFactory& factory); void register_function_least_greast(SimpleFunctionFactory& factory); +void register_function_fake(SimpleFunctionFactory& factory); class SimpleFunctionFactory { using Creator = std::function<FunctionBuilderPtr()>; @@ -191,6 +193,7 @@ public: register_function_datetime_floor_ceil(instance); register_function_convert_tz(instance); register_function_least_greast(instance); + register_function_fake(instance); }); return instance; } diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 04c5da7..6cf9a14 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -16,6 +16,7 @@ // under the License. #pragma once + #include <thrift/protocol/TJSONProtocol.h> #include <boost/shared_ptr.hpp> @@ -23,6 +24,7 @@ #include "runtime/descriptors.h" #include "vec/columns/column_nullable.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr.h" namespace doris::vectorized { class VectorizedUtils { @@ -47,8 +49,9 @@ public: size_t size = dst.size(); auto* __restrict l = dst.data(); auto* __restrict r = src.data(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { l[i] |= r[i]; + } } static DataTypes get_data_types(const RowDescriptor& row_desc) { @@ -60,7 +63,27 @@ public: } return data_types; } + + static VExpr* dfs_peel_conjunct(VExpr* expr, int& leaf_index, + std::function<bool(int)> checker) { + static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; + + if (is_leaf(expr)) { + return checker(leaf_index++) ? nullptr : expr; + } else { + VExpr* left_child = dfs_peel_conjunct(expr->children()[0], leaf_index, checker); + VExpr* right_child = dfs_peel_conjunct(expr->children()[1], leaf_index, checker); + + if (left_child != nullptr && right_child != nullptr) { + expr->set_children({left_child, right_child}); + return expr; + } + // here do not close Expr* now + return left_child != nullptr ? left_child : right_child; + } + } }; + } // namespace doris::vectorized namespace apache::thrift { @@ -76,4 +99,5 @@ ThriftStruct from_json_string(const std::string& json_val) { ts.read(&protocol); return ts; } + } // namespace apache::thrift diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 4d1f5d3..8bd6829 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -823,7 +823,7 @@ visible_functions = [ [['esquery'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], '_ZN5doris11ESFunctions5matchEPN' - '9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], + '9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], # String builtin functions [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org