This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 9c52b4a508 [enhance] improve dict in-predicate evaluate (#10009) 9c52b4a508 is described below commit 9c52b4a508f7c4865ae077e286ea0b7794efa03b Author: minghong <minghong.z...@163.com> AuthorDate: Thu Jun 9 00:25:30 2022 +0800 [enhance] improve dict in-predicate evaluate (#10009) --- be/src/olap/in_list_predicate.cpp | 13 ++++++++----- be/src/vec/columns/column_dictionary.h | 17 +++++++++-------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index b33ef09c49..82c8241368 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -132,13 +132,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) auto* nested_col_ptr = vectorized::check_and_get_column< \ vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ - auto dict_codes = nested_col_ptr->find_codes(_values); \ + std::vector<bool> selected; \ + nested_col_ptr->find_codes(_values, selected); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - bool ret = !null_bitmap[idx] && \ - (dict_codes.find(cell_value) OP dict_codes.end()); \ + DCHECK(cell_value < selected.size()); \ + bool ret = !null_bitmap[idx] && (selected[cell_value] OP false); \ new_size += _opposite ? !ret : ret; \ } \ } \ @@ -161,12 +162,14 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \ column); \ auto& data_array = dict_col.get_data(); \ - auto dict_codes = dict_col.find_codes(_values); \ + std::vector<bool> selected; \ + dict_col.find_codes(_values, selected); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \ + DCHECK(cell_value < selected.size()); \ + auto result = (selected[cell_value] OP false); \ new_size += _opposite ? !result : result; \ } \ } \ diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 532d7eb6a7..29db3a334c 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -258,9 +258,9 @@ public: uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); } - phmap::flat_hash_set<int32_t> find_codes( - const phmap::flat_hash_set<StringValue>& values) const { - return _dict.find_codes(values); + void find_codes(const phmap::flat_hash_set<StringValue>& values, + std::vector<bool>& selected) const { + return _dict.find_codes(values, selected); } bool is_dict_sorted() const { return _dict_sorted; } @@ -362,16 +362,17 @@ public: return greater ? bound - greater + eq : bound - eq; } - phmap::flat_hash_set<int32_t> find_codes( - const phmap::flat_hash_set<StringValue>& values) const { - phmap::flat_hash_set<int32_t> code_set; + void find_codes(const phmap::flat_hash_set<StringValue>& values, + std::vector<bool>& selected) const { + size_t dict_word_num = _dict_data.size(); + selected.resize(dict_word_num); + selected.assign(dict_word_num, false); for (const auto& value : values) { auto it = _inverted_index.find(value); if (it != _inverted_index.end()) { - code_set.insert(it->second); + selected[it->second] = true; } } - return code_set; } void clear() { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org