This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 43915936b6 [refactor] add evaluate_and_vec() for ComparisonPredicateBase (#10631) 43915936b6 is described below commit 43915936b6857f8becc3ab06476abbe2ec4ae6db Author: minghong <minghong.z...@163.com> AuthorDate: Fri Jul 8 14:47:37 2022 +0800 [refactor] add evaluate_and_vec() for ComparisonPredicateBase (#10631) --- be/src/olap/column_predicate.h | 5 +- be/src/olap/comparison_predicate.h | 54 ++++++++++++++++------ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 +++++------ be/src/olap/rowset/segment_v2/segment_iterator.h | 25 +++++++++- 4 files changed, 80 insertions(+), 32 deletions(-) diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index be7a1bdc59..b2480fe672 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -92,7 +92,10 @@ public: virtual void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const { DCHECK(false) << "should not reach here"; } - + virtual void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size, + bool* flags) const { + DCHECK(false) << "should not reach here"; + } uint32_t column_id() const { return _column_id; } protected: diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index f0401aa4cc..8d1c0ff604 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -160,8 +160,9 @@ public: _evaluate_bit<false>(column, sel, size, flags); } - void evaluate_vec(const vectorized::IColumn& column, uint16_t size, - bool* flags) const override { + template <bool is_and> + __attribute__((flatten)) void _evaluate_vec_internal(const vectorized::IColumn& column, + uint16_t size, bool* flags) const { if (column.is_nullable()) { auto* nullable_column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(column); @@ -180,7 +181,8 @@ public: : dict_column_ptr->find_code(_value); auto* data_array = dict_column_ptr->get_data().data(); - _base_loop_vec<true>(size, flags, null_map.data(), data_array, dict_code); + _base_loop_vec<true, is_and>(size, flags, null_map.data(), data_array, + dict_code); } else { LOG(FATAL) << "column_dictionary must use StringValue predicate."; } @@ -190,7 +192,7 @@ public: .get_data() .data(); - _base_loop_vec<true>(size, flags, null_map.data(), data_array, _value_real); + _base_loop_vec<true, is_and>(size, flags, null_map.data(), data_array, _value_real); } } else { if (column.is_column_dictionary()) { @@ -202,7 +204,7 @@ public: : dict_column_ptr->find_code(_value); auto* data_array = dict_column_ptr->get_data().data(); - _base_loop_vec<false>(size, flags, nullptr, data_array, dict_code); + _base_loop_vec<false, is_and>(size, flags, nullptr, data_array, dict_code); } else { LOG(FATAL) << "column_dictionary must use StringValue predicate."; } @@ -213,7 +215,7 @@ public: ->get_data() .data(); - _base_loop_vec<false>(size, flags, nullptr, data_array, _value_real); + _base_loop_vec<false, is_and>(size, flags, nullptr, data_array, _value_real); } } @@ -224,6 +226,16 @@ public: } } + void evaluate_vec(const vectorized::IColumn& column, uint16_t size, + bool* flags) const override { + _evaluate_vec_internal<false>(column, size, flags); + } + + void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size, + bool* flags) const override { + _evaluate_vec_internal<true>(column, size, flags); + } + private: using TReal = std::conditional_t<std::is_same_v<T, uint24_t>, uint32_t, T>; @@ -313,14 +325,28 @@ private: } } - template <bool is_nullable, typename TArray, typename TValue> - void _base_loop_vec(uint16_t size, bool* __restrict flags, const uint8_t* __restrict null_map, - const TArray* __restrict data_array, const TValue& value) const { - for (uint16_t i = 0; i < size; i++) { - if constexpr (is_nullable) { - flags[i] = !null_map[i] && _operator(data_array[i], value); - } else { - flags[i] = _operator(data_array[i], value); + template <bool is_nullable, bool is_and, typename TArray, typename TValue> + __attribute__((flatten)) void _base_loop_vec(uint16_t size, bool* __restrict bflags, + const uint8_t* __restrict null_map, + const TArray* __restrict data_array, + const TValue& value) const { + //uint8_t helps compiler to generate vectorized code + uint8_t* flags = reinterpret_cast<uint8_t*>(bflags); + if constexpr (is_and) { + for (uint16_t i = 0; i < size; i++) { + if constexpr (is_nullable) { + flags[i] &= (uint8_t)(!null_map[i] && _operator(data_array[i], value)); + } else { + flags[i] &= (uint8_t)_operator(data_array[i], value); + } + } + } else { + for (uint16_t i = 0; i < size; i++) { + if constexpr (is_nullable) { + flags[i] = !null_map[i] && _operator(data_array[i], value); + } else { + flags[i] = _operator(data_array[i], value); + } } } } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d1a70933c9..0e81609005 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -661,11 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() { // Step1: check pred using short eval or vec eval if (_can_evaluated_by_vectorized(predicate)) { vec_pred_col_id_set.insert(predicate->column_id()); - if (_pre_eval_block_predicate == nullptr) { - _pre_eval_block_predicate.reset(new AndBlockColumnPredicate()); - } - _pre_eval_block_predicate->add_column_predicate( - new SingleColumnBlockPredicate(predicate)); + _pre_eval_block_predicate.push_back(predicate); } else { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); @@ -879,8 +875,16 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_ } uint16_t original_size = selected_size; - bool ret_flags[selected_size]; - _pre_eval_block_predicate->evaluate_vec(_current_return_columns, selected_size, ret_flags); + bool ret_flags[original_size]; + DCHECK(_pre_eval_block_predicate.size() > 0); + auto column_id = _pre_eval_block_predicate[0]->column_id(); + auto& column = _current_return_columns[column_id]; + _pre_eval_block_predicate[0]->evaluate_vec(*column, original_size, ret_flags); + for (int i = 1; i < _pre_eval_block_predicate.size(); i++) { + auto column_id2 = _pre_eval_block_predicate[i]->column_id(); + auto& column2 = _current_return_columns[column_id2]; + _pre_eval_block_predicate[i]->evaluate_and_vec(*column2, original_size, ret_flags); + } uint16_t new_size = 0; @@ -928,15 +932,6 @@ uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro for (auto predicate : _short_cir_eval_predicate) { auto column_id = predicate->column_id(); auto& short_cir_column = _current_return_columns[column_id]; - auto* col_ptr = short_cir_column.get(); - - // Dictionary column should do something to initial. - if (PredicateTypeTraits::is_range(predicate->type())) { - col_ptr->convert_dict_codes_if_necessary(); - } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { - col_ptr->generate_hash_values_for_runtime_filter(); - } - selected_size = predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size); } _opts.stats->rows_vec_cond_filtered += original_size - selected_size; @@ -1024,6 +1019,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { if (!_is_need_vec_eval && !_is_need_short_eval) { _output_non_pred_columns(block); } else { + _convert_dict_code_for_predicate_if_necessary(); uint16_t selected_size = nrows_read; uint16_t sel_rowid_idx[selected_size]; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index b56baf2888..195307d9f5 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -122,6 +122,29 @@ private: bool _can_evaluated_by_vectorized(ColumnPredicate* predicate); + // Dictionary column should do something to initial. + void _convert_dict_code_for_predicate_if_necessary() { + for (auto predicate : _short_cir_eval_predicate) { + auto& column = _current_return_columns[predicate->column_id()]; + auto* col_ptr = column.get(); + if (PredicateTypeTraits::is_range(predicate->type())) { + col_ptr->convert_dict_codes_if_necessary(); + } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { + col_ptr->generate_hash_values_for_runtime_filter(); + } + } + + for (auto predicate : _pre_eval_block_predicate) { + auto& column = _current_return_columns[predicate->column_id()]; + auto* col_ptr = column.get(); + if (PredicateTypeTraits::is_range(predicate->type())) { + col_ptr->convert_dict_codes_if_necessary(); + } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { + col_ptr->generate_hash_values_for_runtime_filter(); + } + } + } + private: class BitmapRangeIterator; @@ -159,7 +182,7 @@ private: _short_cir_pred_column_ids; // keep columnId of columns for short circuit predicate evaluation std::vector<bool> _is_pred_column; // columns hold by segmentIter vectorized::MutableColumns _current_return_columns; - std::unique_ptr<AndBlockColumnPredicate> _pre_eval_block_predicate; + std::vector<ColumnPredicate*> _pre_eval_block_predicate; std::vector<ColumnPredicate*> _short_cir_eval_predicate; // when lazy materialization is enable, segmentIter need to read data at least twice // first, read predicate columns by various index --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org