This is an automated email from the ASF dual-hosted git repository. wangbo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new a9831f87f2 [refactor]refactor lazy materialized (#8834) a9831f87f2 is described below commit a9831f87f25aa1e5b2c55cfce141c510b10e2a42 Author: wangbo <wan...@apache.org> AuthorDate: Fri May 6 19:16:35 2022 +0800 [refactor]refactor lazy materialized (#8834) [refactor]refactor lazy materialized (#8834) --- be/src/exec/olap_scan_node.cpp | 2 +- be/src/exec/olap_scan_node.h | 2 +- be/src/exec/olap_scanner.cpp | 2 +- be/src/olap/column_predicate.h | 2 +- be/src/olap/null_predicate.cpp | 2 +- be/src/olap/olap_common.h | 2 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 227 ++++++++++++--------- be/src/olap/rowset/segment_v2/segment_iterator.h | 3 +- .../storage/test_dup_tab_basic_int_nullable.groovy | 4 + .../test_dup_tab_basic_varchar_nullable.groovy | 5 + .../storage/test_dup_tab_char_nullable.groovy | 2 + .../storage/test_dup_tab_date_nullable.groovy | 4 + .../storage/test_dup_tab_datetime_nullable.groovy | 4 + .../storage/test_dup_tab_decimal_nullable.groovy | 4 + .../test_dup_tab_mixed_type_nullable.groovy | 3 + 15 files changed, 165 insertions(+), 103 deletions(-) diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 61befd7eda..c0ec6f1150 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -126,7 +126,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT); _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime"); _short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime"); - _pred_col_read_timer = ADD_TIMER(_segment_profile, "PredColumnReadTime"); + _first_read_timer = ADD_TIMER(_segment_profile, "FirstReadTime"); _lazy_read_timer = ADD_TIMER(_segment_profile, "LazyReadTime"); _output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime"); diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 688c4517a3..6313101b2b 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -290,7 +290,7 @@ protected: RuntimeProfile::Counter* _rows_vec_cond_counter = nullptr; RuntimeProfile::Counter* _vec_cond_timer = nullptr; RuntimeProfile::Counter* _short_cond_timer = nullptr; - RuntimeProfile::Counter* _pred_col_read_timer = nullptr; + RuntimeProfile::Counter* _first_read_timer = nullptr; RuntimeProfile::Counter* _lazy_read_timer = nullptr; RuntimeProfile::Counter* _output_col_timer = nullptr; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 2478d2b2bf..40029414b8 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -545,7 +545,7 @@ void OlapScanner::update_counter() { // COUNTER_UPDATE(_parent->_filtered_rows_counter, stats.num_rows_filtered); COUNTER_UPDATE(_parent->_vec_cond_timer, stats.vec_cond_ns); COUNTER_UPDATE(_parent->_short_cond_timer, stats.short_cond_ns); - COUNTER_UPDATE(_parent->_pred_col_read_timer, stats.pred_col_read_ns); + COUNTER_UPDATE(_parent->_first_read_timer, stats.first_read_ns); COUNTER_UPDATE(_parent->_lazy_read_timer, stats.lazy_read_ns); COUNTER_UPDATE(_parent->_output_col_timer, stats.output_col_ns); COUNTER_UPDATE(_parent->_rows_vec_cond_counter, stats.rows_vec_cond_filtered); diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 7ebb4604f0..7aa3218314 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -43,7 +43,7 @@ enum class PredicateType { IN_LIST = 7, NOT_IN_LIST = 8, IS_NULL = 9, - NOT_IS_NULL = 10, + IS_NOT_NULL = 10, BF = 11, // BloomFilter }; diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp index 43cfbcaab3..1210e1f439 100644 --- a/be/src/olap/null_predicate.cpp +++ b/be/src/olap/null_predicate.cpp @@ -30,7 +30,7 @@ NullPredicate::NullPredicate(uint32_t column_id, bool is_null, bool opposite) : ColumnPredicate(column_id), _is_null(opposite != is_null) {} PredicateType NullPredicate::type() const { - return _is_null ? PredicateType::IS_NULL : PredicateType::NOT_IS_NULL; + return _is_null ? PredicateType::IS_NULL : PredicateType::IS_NOT_NULL; } void NullPredicate::evaluate(VectorizedRowBatch* batch) const { diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 3afa23f983..c8256d0f2e 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -265,7 +265,7 @@ struct OlapReaderStatistics { int64_t rows_vec_del_cond_filtered = 0; int64_t vec_cond_ns = 0; int64_t short_cond_ns = 0; - int64_t pred_col_read_ns = 0; + int64_t first_read_ns = 0; int64_t lazy_read_ns = 0; int64_t output_col_ns = 0; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 0fd4a16ee4..ec2093e2f8 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -523,7 +523,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) { // phase 1: read rows selected by various index (indicated by _row_bitmap) into block // when using lazy-materialization-read, only columns with predicates are read { - SCOPED_RAW_TIMER(&_opts.stats->pred_col_read_ns); + SCOPED_RAW_TIMER(&_opts.stats->first_read_ns); do { uint32_t range_from; uint32_t range_to; @@ -607,22 +607,45 @@ Status SegmentIterator::next_batch(RowBlockV2* block) { /* ---------------------- for vecterization implementation ---------------------- */ +/** + * For storage layer data type, can be measured from two perspectives: + * 1 Whether the type can be read in a fast way(batch read using SIMD) + * Such as integer type and float type, this type can be read in SIMD way. + * For the type string/bitmap/hll, they can not be read in batch way, so read this type data is slow. + * If a type can be read fast, we can try to eliminate Lazy Materialization, because we think for this type, seek cost > read cost. + * This is an estimate, if we want more precise cost, statistics collection is necessary(this is a todo). + * In short, when returned non-pred columns contains string/hll/bitmap, we using Lazy Materialization. + * Otherwish, we disable it. + * + * When Lazy Materialization enable, we need to read column at least two times. + * Firt time to read Pred col, second time to read non-pred. + * Here's an interesting question to research, whether read Pred col once is the best plan. + * (why not read Pred col twice or more?) + * + * When Lazy Materialization disable, we just need to read once. + * + * + * 2 Whether the predicate type can be evaluate in a fast way(using SIMD to eval pred) + * Such as integer type and float type, they can be eval fast. + * But for BloomFilter/string/date, they eval slow. + * If a type can be eval fast, we use vectorizaion to eval it. + * Otherwise, we use short-circuit to eval it. + * + * + */ + // todo(wb) need a UT here void SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column.resize(_schema.columns().size(), false); - // including short_cir_pred_col_id_set and vec_pred_col_id_set + // including short/vec/delete pred std::set<ColumnId> pred_column_ids; - _is_all_column_basic_type = true; - bool is_predicate_column_exists = false; - bool is_non_predicate_column_exists = false; + _lazy_materialization_read = false; std::set<ColumnId> del_cond_id_set; _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set); if (!_col_predicates.empty() || !del_cond_id_set.empty()) { - is_predicate_column_exists = true; - std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid std::set<ColumnId> vec_pred_col_id_set; @@ -632,13 +655,16 @@ void SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column[cid] = true; pred_column_ids.insert(cid); + // Step1: check pred using short eval or vec eval if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF || predicate->type() == PredicateType::IN_LIST || - predicate->type() == PredicateType::NOT_IN_LIST) { + predicate->type() == PredicateType::NOT_IN_LIST || + predicate->type() == PredicateType::IS_NULL || + predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE || + type == OLAP_FIELD_TYPE_DECIMAL) { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); - _is_all_column_basic_type = false; } else { vec_pred_col_id_set.insert(predicate->column_id()); if (_pre_eval_block_predicate == nullptr) { @@ -653,75 +679,84 @@ void SegmentIterator::_vec_init_lazy_materialization() { if (!del_cond_id_set.empty()) { short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end()); pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end()); - _is_all_column_basic_type = false; for (auto cid : del_cond_id_set) { _is_pred_column[cid] = true; } } - if (_schema.column_ids().size() > pred_column_ids.size()) { - for (auto cid : _schema.column_ids()) { - if (!_is_pred_column[cid]) { - _non_predicate_columns.push_back(cid); - is_non_predicate_column_exists = true; - - // todo(wb) make a cost-based lazy-materialization framework - // check non-pred column type to decide whether using lazy-materialization - FieldType type = _schema.column(cid)->type(); - if (_is_all_column_basic_type && - (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT || - type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || - type == OLAP_FIELD_TYPE_STRING)) { - _is_all_column_basic_type = false; - } - } - } - } - _vec_pred_column_ids.assign(vec_pred_col_id_set.cbegin(), vec_pred_col_id_set.cend()); _short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(), short_cir_pred_col_id_set.cend()); - } else { - _is_all_column_basic_type = false; - is_non_predicate_column_exists = true; + } + + if (!_vec_pred_column_ids.empty()) { + _is_need_vec_eval = true; + } + if (!_short_cir_pred_column_ids.empty()) { + _is_need_short_eval = true; + } + + // Step 2: check non-predicate read costs to determine whether need lazy materialization + // fill _non_predicate_columns. + // note(wb) For block schema, query layer and storage layer may have some diff + // query layer block schema not contains delete column, but storage layer appends delete column to end of block schema + // When output block to query layer, delete column can be skipped. + // _schema.column_ids() stands for storage layer block schema, so it contains delete columnid + // we just regard delete column as common pred column here. + if (_schema.column_ids().size() > pred_column_ids.size()) { for (auto cid : _schema.column_ids()) { - _non_predicate_columns.push_back(cid); + if (!_is_pred_column[cid]) { + _non_predicate_columns.push_back(cid); + FieldType type = _schema.column(cid)->type(); + + // todo(wb) maybe we can make read char type faster + // todo(wb) support map/array type + // todo(wb) consider multiple integer columns cost, such as 1000 columns, maybe lazy materialization faster + if (!_lazy_materialization_read && + (_is_need_vec_eval || + _is_need_short_eval) && // only when pred exists, we need to consider lazy materialization + (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT || + type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || + type == OLAP_FIELD_TYPE_STRING || type == OLAP_FIELD_TYPE_BOOL || + type == OLAP_FIELD_TYPE_DATE || type == OLAP_FIELD_TYPE_DATETIME || + type == OLAP_FIELD_TYPE_DECIMAL)) { + _lazy_materialization_read = true; + } + } } } - // note(wb) in following cases we disable lazy materialization - // case 1: when all column is basic type(is_all_column_basic_type = true) - // because we think `seek and read` cost > read page cost, lazy materialize may cause more `seek and read`, so disable it - // case 2: all column is predicate column - // case 3: all column is not predicate column - // todo(wb) need further research more lazy materialization rule, such as get more info from `statistics` for better decision - if (_is_all_column_basic_type) { - std::set<ColumnId> pred_set(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); + // Step 3: fill column ids for read and output + if (_lazy_materialization_read) { + // insert pred cid to first_read_columns + for (auto cid : pred_column_ids) { + _first_read_column_ids.push_back(cid); + } + } else if (!_is_need_vec_eval && + !_is_need_short_eval) { // no pred exists, just read and output column + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + _first_read_column_ids.push_back(cid); + } + } else { // pred exits, but we can eliminate lazy materialization + // insert pred/non-pred cid to first read columns + std::set<ColumnId> pred_id_set; + pred_id_set.insert(_short_cir_pred_column_ids.begin(), _short_cir_pred_column_ids.end()); + pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), _non_predicate_columns.end()); - // when _is_all_column_basic_type = true, _first_read_column_ids should keep the same order with _schema.column_ids which stands for return column order for (int i = 0; i < _schema.num_column_ids(); i++) { auto cid = _schema.column_id(i); - if (pred_set.find(cid) != pred_set.end()) { + if (pred_id_set.find(cid) != pred_id_set.end()) { _first_read_column_ids.push_back(cid); } else if (non_pred_set.find(cid) != non_pred_set.end()) { _first_read_column_ids.push_back(cid); - // in this case, non-predicate column should also be filtered by sel idx, so we regard it as pred columns + // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns _is_pred_column[cid] = true; } } - - } else if (is_predicate_column_exists && !is_non_predicate_column_exists) { - _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend()); - } else if (!is_predicate_column_exists && is_non_predicate_column_exists) { - for (auto cid : _non_predicate_columns) { - _first_read_column_ids.push_back(cid); - } - } else { - _lazy_materialization_read = true; - _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend()); } // make _schema_block_id_map @@ -787,7 +822,7 @@ void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) { Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read, bool set_block_rowid) { - SCOPED_RAW_TIMER(&_opts.stats->pred_col_read_ns); + SCOPED_RAW_TIMER(&_opts.stats->first_read_ns); do { uint32_t range_from; uint32_t range_to; @@ -818,7 +853,7 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32 void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t& selected_size) { SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns); - if (_vec_pred_column_ids.empty()) { + if (!_is_need_vec_eval) { for (uint32_t i = 0; i < selected_size; ++i) { sel_rowid_idx[i] = i; } @@ -859,7 +894,7 @@ void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_idx, uint16_t* selected_size_ptr) { SCOPED_RAW_TIMER(&_opts.stats->short_cond_ns); - if (_short_cir_pred_column_ids.empty()) { + if (!_is_need_short_eval) { return; } @@ -911,17 +946,19 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { if (UNLIKELY(!_inited)) { RETURN_IF_ERROR(_init(true)); _inited = true; - if (!_vec_pred_column_ids.empty() || !_short_cir_pred_column_ids.empty()) { + if (_lazy_materialization_read) { _block_rowids.resize(_opts.block_row_max); } _current_return_columns.resize(_schema.columns().size()); - for (size_t i = 0; i < _schema.num_column_ids(); i++) { - auto cid = _schema.column_id(i); - if (_is_pred_column[cid]) { - auto column_desc = _schema.column(cid); - _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr( - column_desc->type(), column_desc->is_nullable()); - _current_return_columns[cid]->reserve(_opts.block_row_max); + if (_is_need_vec_eval || _is_need_short_eval) { + for (size_t i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + if (_is_pred_column[cid]) { + auto column_desc = _schema.column(cid); + _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr( + column_desc->type(), column_desc->is_nullable()); + _current_return_columns[cid]->reserve(_opts.block_row_max); + } } } } @@ -947,48 +984,46 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { return Status::EndOfFile("no more data in segment"); } - // when no predicate(include delete condition) is provided, output column directly - if (_vec_pred_column_ids.empty() && _short_cir_pred_column_ids.empty()) { + if (!_is_need_vec_eval && !_is_need_short_eval) { _output_non_pred_columns(block); - } else { // need predicate evaluation + } else { uint16_t selected_size = nrows_read; uint16_t sel_rowid_idx[selected_size]; // step 1: evaluate vectorization predicate _evaluate_vectorization_predicate(sel_rowid_idx, selected_size); - // When predicate column and no-predicate column are both basic type, lazy materialization is eliminate - // So output block directly after vectorization evaluation - if (_is_all_column_basic_type) { - RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx, - selected_size)); - } else { - // step 2: evaluate short ciruit predicate - // todo(wb) research whether need to read short predicate after vectorization evaluation - // to reduce cost of read short circuit columns. - // In SSB test, it make no difference; So need more scenarios to test - _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size); - - // step3: read non_predicate column - if (!_non_predicate_columns.empty()) { - _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx, - selected_size, &_current_return_columns); + // step 2: evaluate short ciruit predicate + // todo(wb) research whether need to read short predicate after vectorization evaluation + // to reduce cost of read short circuit columns. + // In SSB test, it make no difference; So need more scenarios to test + _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size); + + if (!_lazy_materialization_read) { + Status ret = _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx, + selected_size); + if (!ret.ok()) { + return ret; } + // shrink char_type suffix zero data + block->shrink_char_type_column_suffix_zero(_char_type_idx); + return ret; + } - // step4: output columns - // 4.1 output non-predicate column - _output_non_pred_columns(block); + // step3: read non_predicate column + _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx, selected_size, + &_current_return_columns); - // 4.2 get union of short_cir_pred and vec_pred - std::set<ColumnId> pred_column_ids; - pred_column_ids.insert(_short_cir_pred_column_ids.begin(), - _short_cir_pred_column_ids.end()); - pred_column_ids.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); + // step4: output columns + // 4.1 output non-predicate column + _output_non_pred_columns(block); - // 4.3 output short circuit and predicate column - RETURN_IF_ERROR(_output_column_by_sel_idx(block, pred_column_ids, sel_rowid_idx, - selected_size)); - } + // 4.3 output short circuit and predicate column + // when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids) + // see _vec_init_lazy_materialization + // todo(wb) need to tell input columnids from output columnids + RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx, + selected_size)); } // shrink char_type suffix zero data diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 489d19b2a2..0ce9975456 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -146,9 +146,10 @@ private: // remember the rowids we've read for the current row block. // could be a local variable of next_batch(), kept here to reuse vector memory std::vector<rowid_t> _block_rowids; + bool _is_need_vec_eval = false; + bool _is_need_short_eval = false; // fields for vectorization execution - bool _is_all_column_basic_type; std::vector<ColumnId> _vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation std::vector<ColumnId> diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy index 2a7f770efe..b3fb96ed1f 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy @@ -38,6 +38,8 @@ PROPERTIES ( ) """ + sql "set enable_vectorized_engine = false" + sql """insert into ${table1} values (9,10,11,12), (9,10,11,12), @@ -57,6 +59,8 @@ PROPERTIES ( (5,6,7,8) """ + sql "set enable_vectorized_engine = true" + test { // siteid column not contain null sql "select siteid,citycode,userid,pv from ${table1} where siteid = 21 " diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy index 6dbe34a61a..1ab4590957 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy @@ -37,6 +37,9 @@ suite("test_dup_tab_basic_varchar_nullable") { "storage_format" = "V2" ) """ + + sql "set enable_vectorized_engine = false" + sql """insert into ${table1} values(null,'qie3','yy','lj'), (null,'hehe',null,'lala'), ('beijing','xuanwu','wugui',null), @@ -47,6 +50,8 @@ suite("test_dup_tab_basic_varchar_nullable") { ('tengxun2','qie',null,'lj') """ + sql "set enable_vectorized_engine = true" + // read single column test { sql "select city from ${table1} order by city" diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy index e67c93d227..fbdb4b78be 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy @@ -38,6 +38,7 @@ PROPERTIES ( ) """ + sql "set enable_vectorized_engine = false" sql """insert into ${table1} values ('a1','a2','a3','a4'), @@ -48,6 +49,7 @@ PROPERTIES ( ('e1','e2','e3','e4'), (null,'e2',null,'e4') """ + sql "set enable_vectorized_engine = true" qt_read_single_column_1 "select city from ${table1} where city in ('a1','e1')" qt_read_single_column_2 "select city from ${table1} where city not in ('a1','e1')" diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy index 122cfe2717..c9613fb6dc 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy @@ -39,6 +39,8 @@ PROPERTIES ( """ + sql "set enable_vectorized_engine = false" + sql """insert into ${table1} values (1, '2021-04-01', '2021-04-02', '2021-04-03'), (1, '2021-03-01', '2021-03-02', '2021-03-03'), @@ -47,6 +49,8 @@ PROPERTIES ( (null, '2021-05-01', 'null', '2021-04-03') """ + sql "set enable_vectorized_engine = true" + qt_sql1 "select date1 from ${table1} order by date1" // read single column diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy index f295b5cec0..4833faee2b 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy @@ -39,6 +39,8 @@ PROPERTIES ( """ + sql "set enable_vectorized_engine = false" + sql """insert into ${table1} values (1,'2021-01-01 23:10:01','2021-01-02 23:10:04','2021-01-02 22:10:04'), (2,'2021-02-01 23:10:01','2021-02-02 23:10:04','2021-03-02 22:10:04'), @@ -48,6 +50,8 @@ PROPERTIES ( (null,'2021-06-01 23:10:01',null,'2021-06-02 22:10:04') """ + sql "set enable_vectorized_engine = true" + qt_read_single_column_1 "select datetime1 from ${table1}" qt_read_single_column_2 "select siteid from ${table1}" diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy index 2fa2eb2386..a858ef7ea1 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy @@ -38,6 +38,8 @@ PROPERTIES ( ) """ + sql "set enable_vectorized_engine = false" + sql """insert into ${table1} values(1.1,1.2,1.3,1.4), (1.1,2.2,2.3,3.4), (2.1,2.2,2.3,2.4), @@ -46,6 +48,8 @@ PROPERTIES ( (null,2,null,4) """ + sql "set enable_vectorized_engine = true" + // query decimal test { sql "select siteid from ${table1} order by siteid" diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy index 924ecc2c68..c729f8a62c 100644 --- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy +++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy @@ -42,6 +42,8 @@ PROPERTIES ( ) """ + sql "set enable_vectorized_engine = false" + sql """insert into ${table1} values(1,2,3.1,4.2,5.3,5.4,'a1','a2'), (2,3,4.1,5.2,6.3,7.4,'b1','b2'), (3,4,5.1,6.2,7.3,8.4,'c1','c2'), @@ -50,6 +52,7 @@ PROPERTIES ( (5,6,5.1,8.2,6.3,11.4,'e1','e2'), (null,7,null,8,null,9,null,'e3') """ + sql "set enable_vectorized_engine = true" // read int and string test { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org