This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0f75bd0e38 [fix](delete) fix query result error after delete (#11754)
0f75bd0e38 is described below

commit 0f75bd0e385ad582747cd027b683d145ecaf9a38
Author: luozenglin <37725793+luozeng...@users.noreply.github.com>
AuthorDate: Mon Aug 15 17:52:03 2022 +0800

    [fix](delete) fix query result error after delete (#11754)
    
    convert dictionary code for delete predicates.
---
 be/src/olap/block_column_predicate.h               | 13 ++++++++
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 39 ++++++++++++++++++++++
 be/src/olap/rowset/segment_v2/segment_iterator.h   | 24 +++----------
 .../delete_p0/test_segment_iterator_delete.out     |  4 +++
 .../delete_p0/test_segment_iterator_delete.groovy  | 18 ++++++++++
 5 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/be/src/olap/block_column_predicate.h 
b/be/src/olap/block_column_predicate.h
index 219a92c3aa..c2830c223b 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -43,6 +43,9 @@ public:
 
     virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const = 
0;
 
+    virtual void get_all_column_predicate(
+            std::set<const ColumnPredicate*>& predicate_set) const = 0;
+
     virtual uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
                               uint16_t selected_size) const {
         return selected_size;
@@ -68,6 +71,10 @@ public:
         column_id_set.insert(_predicate->column_id());
     };
 
+    void get_all_column_predicate(std::set<const ColumnPredicate*>& 
predicate_set) const override {
+        predicate_set.insert(_predicate);
+    }
+
     uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
                       uint16_t selected_size) const override;
     void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, 
uint16_t selected_size,
@@ -103,6 +110,12 @@ public:
         }
     };
 
+    void get_all_column_predicate(std::set<const ColumnPredicate*>& 
predicate_set) const override {
+        for (auto child_block_predicate : _block_column_predicate_vec) {
+            child_block_predicate->get_all_column_predicate(predicate_set);
+        }
+    }
+
 protected:
     std::vector<const BlockColumnPredicate*> _block_column_predicate_vec;
 };
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index cc728bdbe7..b246352b97 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -751,6 +751,16 @@ void SegmentIterator::_vec_init_lazy_materialization() {
     std::set<ColumnId> del_cond_id_set;
     _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set);
 
+    std::set<const ColumnPredicate*> delete_predicate_set {};
+    
_opts.delete_condition_predicates->get_all_column_predicate(delete_predicate_set);
+    for (const auto predicate : delete_predicate_set) {
+        if (PredicateTypeTraits::is_range(predicate->type())) {
+            _delete_range_column_ids.push_back(predicate->column_id());
+        } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+            _delete_bloom_filter_column_ids.push_back(predicate->column_id());
+        }
+    }
+
     if (!_col_predicates.empty() || !del_cond_id_set.empty()) {
         std::set<ColumnId> short_cir_pred_col_id_set; // using set for 
distinct cid
         std::set<ColumnId> vec_pred_col_id_set;
@@ -1199,6 +1209,35 @@ Status SegmentIterator::next_batch(vectorized::Block* 
block) {
     return Status::OK();
 }
 
+void SegmentIterator::_convert_dict_code_for_predicate_if_necessary() {
+    for (auto predicate : _short_cir_eval_predicate) {
+        _convert_dict_code_for_predicate_if_necessary_impl(predicate);
+    }
+
+    for (auto predicate : _pre_eval_block_predicate) {
+        _convert_dict_code_for_predicate_if_necessary_impl(predicate);
+    }
+
+    for (auto column_id : _delete_range_column_ids) {
+        
_current_return_columns[column_id].get()->convert_dict_codes_if_necessary();
+    }
+
+    for (auto column_id : _delete_bloom_filter_column_ids) {
+        
_current_return_columns[column_id].get()->generate_hash_values_for_runtime_filter();
+    }
+}
+
+void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl(
+        ColumnPredicate* predicate) {
+    auto& column = _current_return_columns[predicate->column_id()];
+    auto* col_ptr = column.get();
+    if (PredicateTypeTraits::is_range(predicate->type())) {
+        col_ptr->convert_dict_codes_if_necessary();
+    } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+        col_ptr->generate_hash_values_for_runtime_filter();
+    }
+}
+
 void SegmentIterator::_update_max_row(const vectorized::Block* block) {
     _estimate_row_size = false;
     auto avg_row_size = block->bytes() / block->rows();
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index d57889ccbe..66e45db1cc 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -134,27 +134,9 @@ private:
     bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
 
     // Dictionary column should do something to initial.
-    void _convert_dict_code_for_predicate_if_necessary() {
-        for (auto predicate : _short_cir_eval_predicate) {
-            auto& column = _current_return_columns[predicate->column_id()];
-            auto* col_ptr = column.get();
-            if (PredicateTypeTraits::is_range(predicate->type())) {
-                col_ptr->convert_dict_codes_if_necessary();
-            } else if 
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
-                col_ptr->generate_hash_values_for_runtime_filter();
-            }
-        }
+    void _convert_dict_code_for_predicate_if_necessary();
 
-        for (auto predicate : _pre_eval_block_predicate) {
-            auto& column = _current_return_columns[predicate->column_id()];
-            auto* col_ptr = column.get();
-            if (PredicateTypeTraits::is_range(predicate->type())) {
-                col_ptr->convert_dict_codes_if_necessary();
-            } else if 
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
-                col_ptr->generate_hash_values_for_runtime_filter();
-            }
-        }
-    }
+    void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* 
predicate);
 
     void _update_max_row(const vectorized::Block* block);
 
@@ -198,6 +180,8 @@ private:
     vectorized::MutableColumns _current_return_columns;
     std::vector<ColumnPredicate*> _pre_eval_block_predicate;
     std::vector<ColumnPredicate*> _short_cir_eval_predicate;
+    std::vector<uint32_t> _delete_range_column_ids;
+    std::vector<uint32_t> _delete_bloom_filter_column_ids;
     // when lazy materialization is enable, segmentIter need to read data at 
least twice
     // first, read predicate columns by various index
     // second, read non-predicate columns
diff --git a/regression-test/data/delete_p0/test_segment_iterator_delete.out 
b/regression-test/data/delete_p0/test_segment_iterator_delete.out
index 41a422e4f4..9dd338b435 100644
--- a/regression-test/data/delete_p0/test_segment_iterator_delete.out
+++ b/regression-test/data/delete_p0/test_segment_iterator_delete.out
@@ -77,3 +77,7 @@
 4      4       4
 6      6       6
 
+-- !sql --
+1      SSS     sk6S0
+1      ttt     zdges
+
diff --git 
a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy 
b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
index 177299a447..1a357bbff4 100644
--- a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
+++ b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
@@ -17,12 +17,25 @@
 
 suite("test_segment_iterator_delete") {
     def tableName = "delete_regression_test_segment_iterator"
+    def tableName_dict = 
"delete_regression_test_segment_iterator_column_dictionary"
 
     // test duplicate key
     sql """ DROP TABLE IF EXISTS ${tableName} """
     sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3 
int not null ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY 
HASH(c3) BUCKETS 1 
     PROPERTIES ( "replication_num" = "1" );"""
 
+    sql """ DROP TABLE IF EXISTS ${tableName_dict} """
+    sql """
+    CREATE TABLE ${tableName_dict} (
+        `tinyint_key` tinyint(4) NOT NULL,
+        `char_50_key` char(50) NOT NULL,
+        `character_key` varchar(500) NOT NULL
+    ) ENGINE=OLAP
+    AGGREGATE KEY(`tinyint_key`, `char_50_key`, `character_key`)
+    DISTRIBUTED BY HASH(`tinyint_key`) BUCKETS 1
+    PROPERTIES ( "replication_num" = "1" );
+    """
+
     sql """INSERT INTO ${tableName} VALUES (1,1,1)"""
     sql """INSERT INTO ${tableName} VALUES (2,2,2)"""
     sql """INSERT INTO ${tableName} VALUES (3,3,3)"""
@@ -90,4 +103,9 @@ suite("test_segment_iterator_delete") {
     qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from 
${tableName};"""
 
     sql """drop table ${tableName} force"""
+
+    // delete ColumnDictionary
+    sql """INSERT INTO ${tableName_dict} VALUES(1, 'dddd', 'adgs'), (1, 'SSS', 
'sk6S0'), (1, 'ttt', 'zdges');"""
+    sql """delete from ${tableName_dict} where character_key < "sk6S0";"""
+    qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from 
${tableName_dict} order by tinyint_key, char_50_key;"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to