This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 43915936b6 [refactor] add evaluate_and_vec() for 
ComparisonPredicateBase (#10631)
43915936b6 is described below

commit 43915936b6857f8becc3ab06476abbe2ec4ae6db
Author: minghong <minghong.z...@163.com>
AuthorDate: Fri Jul 8 14:47:37 2022 +0800

    [refactor] add evaluate_and_vec() for ComparisonPredicateBase (#10631)
---
 be/src/olap/column_predicate.h                     |  5 +-
 be/src/olap/comparison_predicate.h                 | 54 ++++++++++++++++------
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 +++++------
 be/src/olap/rowset/segment_v2/segment_iterator.h   | 25 +++++++++-
 4 files changed, 80 insertions(+), 32 deletions(-)

diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index be7a1bdc59..b2480fe672 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -92,7 +92,10 @@ public:
     virtual void evaluate_vec(const vectorized::IColumn& column, uint16_t 
size, bool* flags) const {
         DCHECK(false) << "should not reach here";
     }
-
+    virtual void evaluate_and_vec(const vectorized::IColumn& column, uint16_t 
size,
+                                  bool* flags) const {
+        DCHECK(false) << "should not reach here";
+    }
     uint32_t column_id() const { return _column_id; }
 
 protected:
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index f0401aa4cc..8d1c0ff604 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -160,8 +160,9 @@ public:
         _evaluate_bit<false>(column, sel, size, flags);
     }
 
-    void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
-                      bool* flags) const override {
+    template <bool is_and>
+    __attribute__((flatten)) void _evaluate_vec_internal(const 
vectorized::IColumn& column,
+                                                         uint16_t size, bool* 
flags) const {
         if (column.is_nullable()) {
             auto* nullable_column_ptr =
                     
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
@@ -180,7 +181,8 @@ public:
                                                  : 
dict_column_ptr->find_code(_value);
                     auto* data_array = dict_column_ptr->get_data().data();
 
-                    _base_loop_vec<true>(size, flags, null_map.data(), 
data_array, dict_code);
+                    _base_loop_vec<true, is_and>(size, flags, null_map.data(), 
data_array,
+                                                 dict_code);
                 } else {
                     LOG(FATAL) << "column_dictionary must use StringValue 
predicate.";
                 }
@@ -190,7 +192,7 @@ public:
                                            .get_data()
                                            .data();
 
-                _base_loop_vec<true>(size, flags, null_map.data(), data_array, 
_value_real);
+                _base_loop_vec<true, is_and>(size, flags, null_map.data(), 
data_array, _value_real);
             }
         } else {
             if (column.is_column_dictionary()) {
@@ -202,7 +204,7 @@ public:
                                                  : 
dict_column_ptr->find_code(_value);
                     auto* data_array = dict_column_ptr->get_data().data();
 
-                    _base_loop_vec<false>(size, flags, nullptr, data_array, 
dict_code);
+                    _base_loop_vec<false, is_and>(size, flags, nullptr, 
data_array, dict_code);
                 } else {
                     LOG(FATAL) << "column_dictionary must use StringValue 
predicate.";
                 }
@@ -213,7 +215,7 @@ public:
                                 ->get_data()
                                 .data();
 
-                _base_loop_vec<false>(size, flags, nullptr, data_array, 
_value_real);
+                _base_loop_vec<false, is_and>(size, flags, nullptr, 
data_array, _value_real);
             }
         }
 
@@ -224,6 +226,16 @@ public:
         }
     }
 
+    void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
+                      bool* flags) const override {
+        _evaluate_vec_internal<false>(column, size, flags);
+    }
+
+    void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
+                          bool* flags) const override {
+        _evaluate_vec_internal<true>(column, size, flags);
+    }
+
 private:
     using TReal = std::conditional_t<std::is_same_v<T, uint24_t>, uint32_t, T>;
 
@@ -313,14 +325,28 @@ private:
         }
     }
 
-    template <bool is_nullable, typename TArray, typename TValue>
-    void _base_loop_vec(uint16_t size, bool* __restrict flags, const uint8_t* 
__restrict null_map,
-                        const TArray* __restrict data_array, const TValue& 
value) const {
-        for (uint16_t i = 0; i < size; i++) {
-            if constexpr (is_nullable) {
-                flags[i] = !null_map[i] && _operator(data_array[i], value);
-            } else {
-                flags[i] = _operator(data_array[i], value);
+    template <bool is_nullable, bool is_and, typename TArray, typename TValue>
+    __attribute__((flatten)) void _base_loop_vec(uint16_t size, bool* 
__restrict bflags,
+                                                 const uint8_t* __restrict 
null_map,
+                                                 const TArray* __restrict 
data_array,
+                                                 const TValue& value) const {
+        //uint8_t helps compiler to generate vectorized code
+        uint8_t* flags = reinterpret_cast<uint8_t*>(bflags);
+        if constexpr (is_and) {
+            for (uint16_t i = 0; i < size; i++) {
+                if constexpr (is_nullable) {
+                    flags[i] &= (uint8_t)(!null_map[i] && 
_operator(data_array[i], value));
+                } else {
+                    flags[i] &= (uint8_t)_operator(data_array[i], value);
+                }
+            }
+        } else {
+            for (uint16_t i = 0; i < size; i++) {
+                if constexpr (is_nullable) {
+                    flags[i] = !null_map[i] && _operator(data_array[i], value);
+                } else {
+                    flags[i] = _operator(data_array[i], value);
+                }
             }
         }
     }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index d1a70933c9..0e81609005 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -661,11 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
             // Step1: check pred using short eval or vec eval
             if (_can_evaluated_by_vectorized(predicate)) {
                 vec_pred_col_id_set.insert(predicate->column_id());
-                if (_pre_eval_block_predicate == nullptr) {
-                    _pre_eval_block_predicate.reset(new 
AndBlockColumnPredicate());
-                }
-                _pre_eval_block_predicate->add_column_predicate(
-                        new SingleColumnBlockPredicate(predicate));
+                _pre_eval_block_predicate.push_back(predicate);
             } else {
                 short_cir_pred_col_id_set.insert(cid);
                 _short_cir_eval_predicate.push_back(predicate);
@@ -879,8 +875,16 @@ uint16_t 
SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
     }
 
     uint16_t original_size = selected_size;
-    bool ret_flags[selected_size];
-    _pre_eval_block_predicate->evaluate_vec(_current_return_columns, 
selected_size, ret_flags);
+    bool ret_flags[original_size];
+    DCHECK(_pre_eval_block_predicate.size() > 0);
+    auto column_id = _pre_eval_block_predicate[0]->column_id();
+    auto& column = _current_return_columns[column_id];
+    _pre_eval_block_predicate[0]->evaluate_vec(*column, original_size, 
ret_flags);
+    for (int i = 1; i < _pre_eval_block_predicate.size(); i++) {
+        auto column_id2 = _pre_eval_block_predicate[i]->column_id();
+        auto& column2 = _current_return_columns[column_id2];
+        _pre_eval_block_predicate[i]->evaluate_and_vec(*column2, 
original_size, ret_flags);
+    }
 
     uint16_t new_size = 0;
 
@@ -928,15 +932,6 @@ uint16_t 
SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro
     for (auto predicate : _short_cir_eval_predicate) {
         auto column_id = predicate->column_id();
         auto& short_cir_column = _current_return_columns[column_id];
-        auto* col_ptr = short_cir_column.get();
-
-        // Dictionary column should do something to initial.
-        if (PredicateTypeTraits::is_range(predicate->type())) {
-            col_ptr->convert_dict_codes_if_necessary();
-        } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
-            col_ptr->generate_hash_values_for_runtime_filter();
-        }
-
         selected_size = predicate->evaluate(*short_cir_column, 
vec_sel_rowid_idx, selected_size);
     }
     _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
@@ -1024,6 +1019,7 @@ Status SegmentIterator::next_batch(vectorized::Block* 
block) {
     if (!_is_need_vec_eval && !_is_need_short_eval) {
         _output_non_pred_columns(block);
     } else {
+        _convert_dict_code_for_predicate_if_necessary();
         uint16_t selected_size = nrows_read;
         uint16_t sel_rowid_idx[selected_size];
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index b56baf2888..195307d9f5 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -122,6 +122,29 @@ private:
 
     bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
 
+    // Dictionary column should do something to initial.
+    void _convert_dict_code_for_predicate_if_necessary() {
+        for (auto predicate : _short_cir_eval_predicate) {
+            auto& column = _current_return_columns[predicate->column_id()];
+            auto* col_ptr = column.get();
+            if (PredicateTypeTraits::is_range(predicate->type())) {
+                col_ptr->convert_dict_codes_if_necessary();
+            } else if 
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+                col_ptr->generate_hash_values_for_runtime_filter();
+            }
+        }
+
+        for (auto predicate : _pre_eval_block_predicate) {
+            auto& column = _current_return_columns[predicate->column_id()];
+            auto* col_ptr = column.get();
+            if (PredicateTypeTraits::is_range(predicate->type())) {
+                col_ptr->convert_dict_codes_if_necessary();
+            } else if 
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+                col_ptr->generate_hash_values_for_runtime_filter();
+            }
+        }
+    }
+
 private:
     class BitmapRangeIterator;
 
@@ -159,7 +182,7 @@ private:
             _short_cir_pred_column_ids; // keep columnId of columns for short 
circuit predicate evaluation
     std::vector<bool> _is_pred_column; // columns hold by segmentIter
     vectorized::MutableColumns _current_return_columns;
-    std::unique_ptr<AndBlockColumnPredicate> _pre_eval_block_predicate;
+    std::vector<ColumnPredicate*> _pre_eval_block_predicate;
     std::vector<ColumnPredicate*> _short_cir_eval_predicate;
     // when lazy materialization is enable, segmentIter need to read data at 
least twice
     // first, read predicate columns by various index


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to