This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1d26b4d6c2 [improvement](predicate) Cache the dict code in ComparisonPredicate (#17684) 1d26b4d6c2 is described below commit 1d26b4d6c288bbe012bf4fc92585aeed194eeded Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Sun Mar 19 17:37:28 2023 +0800 [improvement](predicate) Cache the dict code in ComparisonPredicate (#17684) --- be/src/olap/column_predicate.h | 5 ++ be/src/olap/comparison_predicate.h | 72 ++++++++++++++++------ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 16 ++++- be/src/olap/rowset/segment_v2/segment_iterator.h | 2 + 4 files changed, 73 insertions(+), 22 deletions(-) diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index f3987a006e..6cc6ec8ad2 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -192,6 +192,11 @@ public: ", opposite=" + (_opposite ? "true" : "false"); } + /// Some predicates need to be cloned for each segment. + virtual bool need_to_clone() const { return false; } + + virtual void clone(ColumnPredicate** to) const { LOG(FATAL) << "clone not supported"; } + std::shared_ptr<PredicateParams> predicate_params() { return _predicate_params; } const std::string pred_type_string(PredicateType type) { diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 26d97a27ed..e2ca003b9b 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -32,7 +32,15 @@ class ComparisonPredicateBase : public ColumnPredicate { public: using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType; ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite = false) - : ColumnPredicate(column_id, opposite), _value(value) {} + : ColumnPredicate(column_id, opposite), + _cached_code(_InvalidateCodeValue), + _value(value) {} + + void clone(ColumnPredicate** to) const override { + *to = new ComparisonPredicateBase(_column_id, _value, _opposite); + } + + bool need_to_clone() const override { return true; } PredicateType type() const override { return PT; } @@ -258,13 +266,20 @@ public: auto* dict_column_ptr = vectorized::check_and_get_column<vectorized::ColumnDictI32>( nested_column); - auto dict_code = _is_range() ? dict_column_ptr->find_code_by_bound( - _value, _is_greater(), _is_eq()) - : dict_column_ptr->find_code(_value); - auto* data_array = dict_column_ptr->get_data().data(); - _base_loop_vec<true, is_and>(size, flags, null_map.data(), data_array, - dict_code); + auto dict_code = _find_code_from_dictionary_column(*dict_column_ptr); + do { + if constexpr (PT == PredicateType::EQ) { + if (dict_code == -2) { + memset(flags, 0, size); + break; + } + } + auto* data_array = dict_column_ptr->get_data().data(); + + _base_loop_vec<true, is_and>(size, flags, null_map.data(), data_array, + dict_code); + } while (false); } else { LOG(FATAL) << "column_dictionary must use StringRef predicate."; } @@ -281,12 +296,18 @@ public: if constexpr (std::is_same_v<T, StringRef>) { auto* dict_column_ptr = vectorized::check_and_get_column<vectorized::ColumnDictI32>(column); - auto dict_code = _is_range() ? dict_column_ptr->find_code_by_bound( - _value, _is_greater(), _is_eq()) - : dict_column_ptr->find_code(_value); - auto* data_array = dict_column_ptr->get_data().data(); - - _base_loop_vec<false, is_and>(size, flags, nullptr, data_array, dict_code); + auto dict_code = _find_code_from_dictionary_column(*dict_column_ptr); + do { + if constexpr (PT == PredicateType::EQ) { + if (dict_code == -2) { + memset(flags, 0, size); + break; + } + } + auto* data_array = dict_column_ptr->get_data().data(); + + _base_loop_vec<false, is_and>(size, flags, nullptr, data_array, dict_code); + } while (false); } else { LOG(FATAL) << "column_dictionary must use StringRef predicate."; } @@ -461,9 +482,7 @@ private: auto* dict_column_ptr = vectorized::check_and_get_column<vectorized::ColumnDictI32>(column); auto* data_array = dict_column_ptr->get_data().data(); - auto dict_code = _is_range() ? dict_column_ptr->find_code_by_bound( - _value, _operator(1, 0), _operator(1, 1)) - : dict_column_ptr->find_code(_value); + auto dict_code = _find_code_from_dictionary_column(*dict_column_ptr); _base_loop_bit<is_nullable, is_and>(sel, size, flags, null_map, data_array, dict_code); } else { @@ -507,9 +526,13 @@ private: auto* dict_column_ptr = vectorized::check_and_get_column<vectorized::ColumnDictI32>(column); auto* data_array = dict_column_ptr->get_data().data(); - auto dict_code = _is_range() ? dict_column_ptr->find_code_by_bound( - _value, _is_greater(), _is_eq()) - : dict_column_ptr->find_code(_value); + auto dict_code = _find_code_from_dictionary_column(*dict_column_ptr); + + if constexpr (PT == PredicateType::EQ) { + if (dict_code == -2) { + return _opposite ? size : 0; + } + } return _base_loop<is_nullable>(sel, size, null_map, data_array, dict_code); } else { @@ -527,12 +550,23 @@ private: } } + __attribute__((flatten)) int32_t _find_code_from_dictionary_column( + const vectorized::ColumnDictI32& column) const { + if (UNLIKELY(_cached_code == _InvalidateCodeValue)) { + _cached_code = _is_range() ? column.find_code_by_bound(_value, _is_greater(), _is_eq()) + : column.find_code(_value); + } + return _cached_code; + } + std::string _debug_string() const override { std::string info = "ComparisonPredicateBase(" + type_to_string(Type) + ", " + type_to_string(PT) + ")"; return info; } + static constexpr int32_t _InvalidateCodeValue = std::numeric_limits<int32_t>::max(); + mutable int32_t _cached_code; T _value; }; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 7d73492be7..0292fa5a90 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -157,7 +157,8 @@ SegmentIterator::SegmentIterator(std::shared_ptr<Segment> segment, const Schema& _lazy_materialization_read(false), _inited(false), _estimate_row_size(true), - _wait_times_estimate_row_size(10) {} + _wait_times_estimate_row_size(10), + _pool(new ObjectPool) {} SegmentIterator::~SegmentIterator() { for (auto iter : _column_iterators) { @@ -173,9 +174,18 @@ SegmentIterator::~SegmentIterator() { Status SegmentIterator::init(const StorageReadOptions& opts) { _opts = opts; - if (!opts.column_predicates.empty()) { - _col_predicates = opts.column_predicates; + + for (auto& predicate : opts.column_predicates) { + if (predicate->need_to_clone()) { + ColumnPredicate* cloned; + predicate->clone(&cloned); + _pool->add(cloned); + _col_predicates.emplace_back(cloned); + } else { + _col_predicates.emplace_back(predicate); + } } + // Read options will not change, so that just resize here _block_rowids.resize(_opts.block_row_max); if (!opts.column_predicates_except_leafnode_of_andnode.empty()) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 17af761071..7e8e1d797e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -397,6 +397,8 @@ private: // used for compaction, record selectd rowids of current batch uint16_t _selected_size; vector<uint16_t> _sel_rowid_idx; + + std::unique_ptr<ObjectPool> _pool; }; } // namespace segment_v2 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org