This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 0f75bd0e38 [fix](delete) fix query result error after delete (#11754) 0f75bd0e38 is described below commit 0f75bd0e385ad582747cd027b683d145ecaf9a38 Author: luozenglin <37725793+luozeng...@users.noreply.github.com> AuthorDate: Mon Aug 15 17:52:03 2022 +0800 [fix](delete) fix query result error after delete (#11754) convert dictionary code for delete predicates. --- be/src/olap/block_column_predicate.h | 13 ++++++++ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 39 ++++++++++++++++++++++ be/src/olap/rowset/segment_v2/segment_iterator.h | 24 +++---------- .../delete_p0/test_segment_iterator_delete.out | 4 +++ .../delete_p0/test_segment_iterator_delete.groovy | 18 ++++++++++ 5 files changed, 78 insertions(+), 20 deletions(-) diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h index 219a92c3aa..c2830c223b 100644 --- a/be/src/olap/block_column_predicate.h +++ b/be/src/olap/block_column_predicate.h @@ -43,6 +43,9 @@ public: virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const = 0; + virtual void get_all_column_predicate( + std::set<const ColumnPredicate*>& predicate_set) const = 0; + virtual uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size) const { return selected_size; @@ -68,6 +71,10 @@ public: column_id_set.insert(_predicate->column_id()); }; + void get_all_column_predicate(std::set<const ColumnPredicate*>& predicate_set) const override { + predicate_set.insert(_predicate); + } + uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override; void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, @@ -103,6 +110,12 @@ public: } }; + void get_all_column_predicate(std::set<const ColumnPredicate*>& predicate_set) const override { + for (auto child_block_predicate : _block_column_predicate_vec) { + child_block_predicate->get_all_column_predicate(predicate_set); + } + } + protected: std::vector<const BlockColumnPredicate*> _block_column_predicate_vec; }; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index cc728bdbe7..b246352b97 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -751,6 +751,16 @@ void SegmentIterator::_vec_init_lazy_materialization() { std::set<ColumnId> del_cond_id_set; _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set); + std::set<const ColumnPredicate*> delete_predicate_set {}; + _opts.delete_condition_predicates->get_all_column_predicate(delete_predicate_set); + for (const auto predicate : delete_predicate_set) { + if (PredicateTypeTraits::is_range(predicate->type())) { + _delete_range_column_ids.push_back(predicate->column_id()); + } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { + _delete_bloom_filter_column_ids.push_back(predicate->column_id()); + } + } + if (!_col_predicates.empty() || !del_cond_id_set.empty()) { std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid std::set<ColumnId> vec_pred_col_id_set; @@ -1199,6 +1209,35 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { return Status::OK(); } +void SegmentIterator::_convert_dict_code_for_predicate_if_necessary() { + for (auto predicate : _short_cir_eval_predicate) { + _convert_dict_code_for_predicate_if_necessary_impl(predicate); + } + + for (auto predicate : _pre_eval_block_predicate) { + _convert_dict_code_for_predicate_if_necessary_impl(predicate); + } + + for (auto column_id : _delete_range_column_ids) { + _current_return_columns[column_id].get()->convert_dict_codes_if_necessary(); + } + + for (auto column_id : _delete_bloom_filter_column_ids) { + _current_return_columns[column_id].get()->generate_hash_values_for_runtime_filter(); + } +} + +void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl( + ColumnPredicate* predicate) { + auto& column = _current_return_columns[predicate->column_id()]; + auto* col_ptr = column.get(); + if (PredicateTypeTraits::is_range(predicate->type())) { + col_ptr->convert_dict_codes_if_necessary(); + } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { + col_ptr->generate_hash_values_for_runtime_filter(); + } +} + void SegmentIterator::_update_max_row(const vectorized::Block* block) { _estimate_row_size = false; auto avg_row_size = block->bytes() / block->rows(); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index d57889ccbe..66e45db1cc 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -134,27 +134,9 @@ private: bool _can_evaluated_by_vectorized(ColumnPredicate* predicate); // Dictionary column should do something to initial. - void _convert_dict_code_for_predicate_if_necessary() { - for (auto predicate : _short_cir_eval_predicate) { - auto& column = _current_return_columns[predicate->column_id()]; - auto* col_ptr = column.get(); - if (PredicateTypeTraits::is_range(predicate->type())) { - col_ptr->convert_dict_codes_if_necessary(); - } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { - col_ptr->generate_hash_values_for_runtime_filter(); - } - } + void _convert_dict_code_for_predicate_if_necessary(); - for (auto predicate : _pre_eval_block_predicate) { - auto& column = _current_return_columns[predicate->column_id()]; - auto* col_ptr = column.get(); - if (PredicateTypeTraits::is_range(predicate->type())) { - col_ptr->convert_dict_codes_if_necessary(); - } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { - col_ptr->generate_hash_values_for_runtime_filter(); - } - } - } + void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate); void _update_max_row(const vectorized::Block* block); @@ -198,6 +180,8 @@ private: vectorized::MutableColumns _current_return_columns; std::vector<ColumnPredicate*> _pre_eval_block_predicate; std::vector<ColumnPredicate*> _short_cir_eval_predicate; + std::vector<uint32_t> _delete_range_column_ids; + std::vector<uint32_t> _delete_bloom_filter_column_ids; // when lazy materialization is enable, segmentIter need to read data at least twice // first, read predicate columns by various index // second, read non-predicate columns diff --git a/regression-test/data/delete_p0/test_segment_iterator_delete.out b/regression-test/data/delete_p0/test_segment_iterator_delete.out index 41a422e4f4..9dd338b435 100644 --- a/regression-test/data/delete_p0/test_segment_iterator_delete.out +++ b/regression-test/data/delete_p0/test_segment_iterator_delete.out @@ -77,3 +77,7 @@ 4 4 4 6 6 6 +-- !sql -- +1 SSS sk6S0 +1 ttt zdges + diff --git a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy index 177299a447..1a357bbff4 100644 --- a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy +++ b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy @@ -17,12 +17,25 @@ suite("test_segment_iterator_delete") { def tableName = "delete_regression_test_segment_iterator" + def tableName_dict = "delete_regression_test_segment_iterator_column_dictionary" // test duplicate key sql """ DROP TABLE IF EXISTS ${tableName} """ sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3 int not null ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c3) BUCKETS 1 PROPERTIES ( "replication_num" = "1" );""" + sql """ DROP TABLE IF EXISTS ${tableName_dict} """ + sql """ + CREATE TABLE ${tableName_dict} ( + `tinyint_key` tinyint(4) NOT NULL, + `char_50_key` char(50) NOT NULL, + `character_key` varchar(500) NOT NULL + ) ENGINE=OLAP + AGGREGATE KEY(`tinyint_key`, `char_50_key`, `character_key`) + DISTRIBUTED BY HASH(`tinyint_key`) BUCKETS 1 + PROPERTIES ( "replication_num" = "1" ); + """ + sql """INSERT INTO ${tableName} VALUES (1,1,1)""" sql """INSERT INTO ${tableName} VALUES (2,2,2)""" sql """INSERT INTO ${tableName} VALUES (3,3,3)""" @@ -90,4 +103,9 @@ suite("test_segment_iterator_delete") { qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" sql """drop table ${tableName} force""" + + // delete ColumnDictionary + sql """INSERT INTO ${tableName_dict} VALUES(1, 'dddd', 'adgs'), (1, 'SSS', 'sk6S0'), (1, 'ttt', 'zdges');""" + sql """delete from ${tableName_dict} where character_key < "sk6S0";""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName_dict} order by tinyint_key, char_50_key;""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org