This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit e23e332f75f6d1b3891c9adcd7ba85f9a89f765e Author: wangbo <506340...@qq.com> AuthorDate: Fri Jan 14 11:44:27 2022 +0800 [Vectorization] Support SegmentIterator vectorization (#7613) --- be/src/olap/column_predicate.h | 2 + be/src/olap/comparison_predicate.cpp | 2 +- be/src/olap/in_list_predicate.cpp | 39 +++ be/src/olap/in_list_predicate.h | 7 +- be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 21 +- be/src/olap/rowset/segment_v2/binary_plain_page.h | 34 +- be/src/olap/rowset/segment_v2/bitshuffle_page.h | 36 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 362 ++++++++++++++++++++- be/src/olap/rowset/segment_v2/segment_iterator.h | 29 +- be/src/olap/schema.cpp | 66 ++++ be/src/olap/schema.h | 4 + be/src/vec/columns/column.h | 13 +- be/src/vec/columns/column_complex.h | 2 + be/src/vec/columns/column_nullable.cpp | 15 +- be/src/vec/columns/column_nullable.h | 6 + be/src/vec/columns/column_vector.h | 26 ++ be/src/vec/columns/predicate_column.h | 27 +- 17 files changed, 661 insertions(+), 30 deletions(-) diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 10b8a91..6b1aa23 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -69,6 +69,8 @@ public: virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {}; uint32_t column_id() const { return _column_id; } + virtual bool is_in_predicate() { return false; } + protected: uint32_t _column_id; bool _opposite; diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index 598e7f3..a154a04 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -188,7 +188,7 @@ COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=) void CLASS<type>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const { \ if (column.is_nullable()) { \ auto* nullable_column = vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \ - auto& data_array = reinterpret_cast<const vectorized::ColumnVector<type>&>(nullable_column->get_nested_column()).get_data(); \ + auto& data_array = reinterpret_cast<const vectorized::PredicateColumnType<type>&>(nullable_column->get_nested_column()).get_data(); \ auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data(); \ for (uint16_t i = 0; i < size; i++) { \ flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \ diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index c167a17..a17e157 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -20,6 +20,8 @@ #include "olap/field.h" #include "runtime/string_value.hpp" #include "runtime/vectorized_row_batch.h" +#include "vec/columns/predicate_column.h" +#include "vec/columns/column_nullable.h" namespace doris { @@ -115,6 +117,43 @@ IN_LIST_PRED_EVALUATE(NotInListPredicate, ==) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) +#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP) \ + template <class type> \ + void CLASS<type>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ + uint16_t new_size = 0; \ + if (column.is_nullable()) { \ + auto* nullable_column = \ + vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \ + auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*( \ + nullable_column->get_null_map_column_ptr())).get_data(); \ + auto* nest_column_vector = vectorized::check_and_get_column \ + <vectorized::PredicateColumnType<type>>(nullable_column->get_nested_column()); \ + auto& data_array = nest_column_vector->get_data(); \ + for (uint16_t i = 0; i < *size; i++) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const type& cell_value = reinterpret_cast<const type&>(data_array[idx]); \ + bool ret = !null_bitmap[idx] && (_values.find(cell_value) OP _values.end()); \ + new_size += _opposite ? !ret : ret; \ + } \ + *size = new_size; \ + } else { \ + auto& number_column = reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);\ + auto& data_array = number_column.get_data(); \ + for (uint16_t i = 0; i < *size; i++) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const type& cell_value = reinterpret_cast<const type&>(data_array[idx]); \ + auto result = (_values.find(cell_value) OP _values.end()); \ + new_size += _opposite ? !result : result; \ + } \ + } \ + *size = new_size; \ + } + +IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=) +IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==) + #define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ template <class type> \ void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index cf9bf61..7cd237b 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -76,6 +76,8 @@ namespace doris { class VectorizedRowBatch; +// todo(wb) support evaluate_and,evaluate_or + #define IN_LIST_PRED_CLASS_DEFINE(CLASS) \ template <class type> \ class CLASS : public ColumnPredicate { \ @@ -90,7 +92,10 @@ class VectorizedRowBatch; virtual Status evaluate(const Schema& schema, \ const std::vector<BitmapIndexIterator*>& iterators, \ uint32_t num_rows, roaring::Roaring* bitmap) const override; \ - \ + void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \ + void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ + void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ + bool is_in_predicate() override { return true; } \ private: \ phmap::flat_hash_set<type> _values; \ }; diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index 7825dcc..edd64db 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -257,15 +257,26 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr const int32_t* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->_chunk.data); size_t start_index = _bit_shuffle_ptr->_cur_index; - // todo(wb) support nullable - if (dst->is_predicate_column()) { + auto* dst_col_ptr = dst.get(); + if (dst->is_nullable()) { + auto nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get()); + dst_col_ptr = nullable_column->get_nested_column_ptr().get(); + + // fill null bitmap here, not null; + // todo(wb) using SIMD speed up here + for (int i = 0; i < max_fetch; i++) { + nullable_column->get_null_map_data().push_back(0); + } + } + + if (dst_col_ptr->is_predicate_column()) { // cast columnptr to columnstringvalue just for avoid virtual function call overhead - vectorized::ColumnStringValue& string_value_vector = reinterpret_cast<vectorized::ColumnStringValue&>(*dst); + auto* string_value_column_ptr = reinterpret_cast<vectorized::ColumnStringValue*>(dst_col_ptr); for (int i = 0; i < max_fetch; i++, start_index++) { int32_t codeword = data_array[start_index]; uint32_t start_offset = _start_offset_array[codeword]; uint32_t str_len = _len_array[codeword]; - string_value_vector.insert_data(&_dict_decoder->_data[start_offset], str_len); + string_value_column_ptr->insert_data(&_dict_decoder->_data[start_offset], str_len); } } else { // todo(wb) research whether using batch memcpy to insert columnString can has better performance when data set is big @@ -273,7 +284,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr int32_t codeword = data_array[start_index]; const uint32_t start_offset = _start_offset_array[codeword]; const uint32_t str_len = _len_array[codeword]; - dst->insert_data(&_dict_decoder->_data[start_offset], str_len); + dst_col_ptr->insert_data(&_dict_decoder->_data[start_offset], str_len); } } _bit_shuffle_ptr->_cur_index += max_fetch; diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index 25ce9e0..3c55d2b 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -38,6 +38,8 @@ #include "runtime/mem_pool.h" #include "util/coding.h" #include "util/faststring.h" +#include "vec/columns/column_complex.h" +#include "vec/columns/column_nullable.h" namespace doris { namespace segment_v2 { @@ -235,20 +237,46 @@ public: } const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx)); - if (dst->is_predicate_column()) { + auto* dst_col_ptr = dst.get(); + if (dst->is_nullable()) { + auto nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get()); + dst_col_ptr = nullable_column->get_nested_column_ptr().get(); + // fill null bitmap here, not null; + for (int i = 0; i < max_fetch; i++) { + nullable_column->get_null_map_data().push_back(0); + } + } + + if (dst_col_ptr->is_bitmap()) { + auto& bitmap_column = reinterpret_cast<vectorized::ColumnBitmap&>(*dst_col_ptr); + for (size_t i = 0; i < max_fetch; i++, _cur_idx++) { + const uint32_t start_offset = offset(_cur_idx); + uint32_t len = offset(_cur_idx + 1) - start_offset; + + bitmap_column.insert_default(); + BitmapValue* pvalue = &bitmap_column.get_element(bitmap_column.size() - 1); + if (len != 0) { + BitmapValue value; + value.deserialize(&_data[start_offset]); + *pvalue = std::move(value); + } else { + *pvalue = std::move(*reinterpret_cast<BitmapValue*>(const_cast<char*>(&_data[start_offset]))); + } + } + } else if (dst_col_ptr->is_predicate_column()) { // todo(wb) padding sv here for better comparison performance for (size_t i = 0; i < max_fetch; i++, _cur_idx++) { const uint32_t start_offset = offset(_cur_idx); uint32_t len = offset(_cur_idx + 1) - start_offset; StringValue sv(const_cast<char*>(&_data[start_offset]), len); - dst->insert_data(reinterpret_cast<char*>(&sv), 0); + dst_col_ptr->insert_data(reinterpret_cast<char*>(&sv), 0); } } else { for (size_t i = 0; i < max_fetch; i++, _cur_idx++) { // todo(wb) need more test case and then improve here const uint32_t start_offset = offset(_cur_idx); uint32_t len = offset(_cur_idx + 1) - start_offset; - dst->insert_data(&_data[start_offset], len); + dst_col_ptr->insert_data(&_data[start_offset], len); } } diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 5afbb7d..83eebae 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -37,6 +37,8 @@ #include "util/coding.h" #include "util/faststring.h" #include "util/slice.h" +#include "vec/runtime/vdatetime_value.h" +#include "vec/columns/column_nullable.h" namespace doris { namespace segment_v2 { @@ -359,17 +361,28 @@ public: int begin = _cur_index; int end = _cur_index + max_fetch; - + + auto* dst_col_ptr = dst.get(); + if (dst->is_nullable()) { + auto nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get()); + dst_col_ptr = nullable_column->get_nested_column_ptr().get(); + + // fill null bitmap here, not null; + for (int j = begin; j < end; j++) { + nullable_column->get_null_map_data().push_back(0); + } + } + // todo(wb) Try to eliminate type judgment in pagedecoder - if (dst->is_column_decimal()) { // decimal non-predicate column + if (dst_col_ptr->is_column_decimal()) { // decimal non-predicate column for (; begin < end; begin++) { const char* cur_ptr = (const char*)&_chunk.data[begin * SIZE_OF_TYPE]; int64_t int_value = *(int64_t*)(cur_ptr); int32_t frac_value = *(int32_t*)(cur_ptr + sizeof(int64_t)); DecimalV2Value data(int_value, frac_value); - dst->insert_data(reinterpret_cast<char*>(&data), 0); + dst_col_ptr->insert_data(reinterpret_cast<char*>(&data), 0); } - } else if (dst->is_date_type()) { + } else if (dst_col_ptr->is_date_type()) { for (; begin < end; begin++) { const char* cur_ptr = (const char*)&_chunk.data[begin * SIZE_OF_TYPE]; uint64_t value = 0; @@ -378,14 +391,21 @@ public: value |= *(unsigned char*)(cur_ptr + 1); value <<= 8; value |= *(unsigned char*)(cur_ptr); - DateTimeValue date; + vectorized::VecDateTimeValue date; date.from_olap_date(value); - dst->insert_data(reinterpret_cast<char*>(&date), 0); + dst_col_ptr->insert_data(reinterpret_cast<char*>(&date), 0); + } + } else if (dst_col_ptr->is_datetime_type()) { + for (; begin < end; begin++) { + const char* cur_ptr = (const char*)&_chunk.data[begin * SIZE_OF_TYPE]; + uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr); + vectorized::VecDateTimeValue date(value); + dst_col_ptr->insert_data(reinterpret_cast<char*>(&date), 0); } } else { - // todo(wb) need performance test here, may be batch memory copy? + // todo(wb) batch insert here for (; begin < end; begin++) { - dst->insert_data((const char*)&_chunk.data[begin * SIZE_OF_TYPE], 0); + dst_col_ptr->insert_data((const char*)&_chunk.data[begin * SIZE_OF_TYPE], 0); } } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d19f56e..86a89ac 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -30,6 +30,7 @@ #include "olap/rowset/segment_v2/segment.h" #include "olap/short_key_index.h" #include "util/doris_metrics.h" +#include "olap/in_list_predicate.h" using strings::Substitute; @@ -120,7 +121,7 @@ Status SegmentIterator::init(const StorageReadOptions& opts) { return Status::OK(); } -Status SegmentIterator::_init() { +Status SegmentIterator::_init(bool is_vec) { DorisMetrics::instance()->segment_read_total->increment(1); // get file handle from file descriptor of segment fs::BlockManager* block_mgr = fs::fs_util::block_manager(_segment->_path_desc.storage_medium); @@ -133,7 +134,11 @@ Status SegmentIterator::_init() { RETURN_IF_ERROR(_get_row_ranges_by_keys()); } RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); - _init_lazy_materialization(); + if (is_vec) { + _vec_init_lazy_materialization(); + } else { + _init_lazy_materialization(); + } _range_iter.reset(new BitmapRangeIterator(_row_bitmap)); return Status::OK(); } @@ -581,9 +586,358 @@ Status SegmentIterator::next_batch(RowBlockV2* block) { return Status::OK(); } +/* ---------------------- for vecterization implementation ---------------------- */ + +// todo(wb) need a UT here +void SegmentIterator::_vec_init_lazy_materialization() { + _is_pred_column.resize(_schema.columns().size(), false); + + std::set<ColumnId> pred_column_ids; // including short_cir_pred_col_id_set and vec_pred_col_id_set + _is_all_column_basic_type = true; + bool is_predicate_column_exists = false; + bool is_non_predicate_column_exists = false; + + if (!_col_predicates.empty()) { + is_predicate_column_exists = true; + + std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid + std::set<ColumnId> vec_pred_col_id_set; + + for (auto predicate : _col_predicates) { + auto cid = predicate->column_id(); + FieldType type = _schema.column(cid)->type(); + _is_pred_column[cid] = true; + pred_column_ids.insert(cid); + + // for date type which can not be executed in a vectorized way, using short circuit execution + if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_DECIMAL + || type == OLAP_FIELD_TYPE_DATE || predicate->is_in_predicate()) { + short_cir_pred_col_id_set.insert(cid); + _short_cir_eval_predicate.push_back(predicate); + _is_all_column_basic_type = false; + } else { + vec_pred_col_id_set.insert(predicate->column_id()); + if (_pre_eval_block_predicate == nullptr) { + _pre_eval_block_predicate = new AndBlockColumnPredicate(); + } + reinterpret_cast<MutilColumnBlockPredicate*>(_pre_eval_block_predicate)->add_column_predicate(new SingleColumnBlockPredicate(predicate)); + } + } + + std::set<ColumnId> del_cond_id_set; + _opts.delete_condition_predicates.get()->get_all_column_ids(del_cond_id_set); + short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end()); + pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end()); + + if (_schema.column_ids().size() > pred_column_ids.size()) { + for (auto cid : _schema.column_ids()) { + if (!_is_pred_column[cid]) { + _non_predicate_columns.push_back(cid); + is_non_predicate_column_exists = true; + } + } + } + + _vec_pred_column_ids.assign(vec_pred_col_id_set.cbegin(), vec_pred_col_id_set.cend()); + _short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(), short_cir_pred_col_id_set.cend()); + } else { + _is_all_column_basic_type = false; + is_non_predicate_column_exists = true; + for (auto cid : _schema.column_ids()) { + _non_predicate_columns.push_back(cid); + } + } + + // note(wb) in following cases we disable lazy materialization + // case 1: when all column is basic type(is_all_column_basic_type = true) + // because we think `seek and read` cost > read page cost, lazy materialize may cause more `seek and read`, so disable it + // case 2: all column is predicate column + // case 3: all column is not predicate column + // todo(wb) need further research more lazy materialization rule, such as get more info from `statistics` for better decision + if (_is_all_column_basic_type) { + std::set<ColumnId> pred_set(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end()); + std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), _non_predicate_columns.end()); + + // when _is_all_column_basic_type = true, _first_read_column_ids should keep the same order with _schema.column_ids which stands for return column order + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_ids()[i]; + if (pred_set.find(cid) != pred_set.end()) { + _first_read_column_ids.push_back(cid); + } else if (non_pred_set.find(cid) != non_pred_set.end()) { + _first_read_column_ids.push_back(cid); + _is_pred_column[cid] = true; // in this case, non-predicate column should also be filtered by sel idx, so we regard it as pred columns + } + } + + } else if (is_predicate_column_exists && !is_non_predicate_column_exists) { + _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend()); + } else if (!is_predicate_column_exists && is_non_predicate_column_exists) { + for (auto cid : _non_predicate_columns) { + _first_read_column_ids.push_back(cid); + } + } else { + _lazy_materialization_read = true; + _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend()); + } + + // make _schema_block_id_map + _schema_block_id_map.resize(_schema.columns().size()); + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_ids()[i]; + _schema_block_id_map[cid] = i; + } + +} + +Status SegmentIterator::_read_columns(const std::vector<ColumnId>& column_ids, vectorized::MutableColumns& column_block, size_t nrows) { + for (auto cid : column_ids) { + auto& column = column_block[cid]; + size_t rows_read = nrows; + RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read, column)); + DCHECK_EQ(nrows, rows_read); + } + return Status::OK(); +} + +void SegmentIterator::_init_current_block(vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& current_columns) { + bool is_block_mem_reuse= block->mem_reuse(); + if (is_block_mem_reuse) { + size_t column_to_keep = _schema.num_column_ids(); + for (int i = block->columns() - 1; i >= column_to_keep; i--) { + block->erase(i); + } + block->clear_column_data(); + } else { // pre fill output block here + for (size_t i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_ids()[i]; + auto* column_desc = _schema.columns()[cid]; + auto data_type = Schema::get_data_type_ptr(column_desc->type()); + if (column_desc->is_nullable()) { + block->insert({nullptr, std::make_shared<vectorized::DataTypeNullable>(std::move(data_type)), column_desc->name()}); + } else { + block->insert({nullptr, std::move(data_type), column_desc->name()}); + } + } + } + + for (size_t i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_ids()[i]; + if (_is_pred_column[cid]) { //todo(wb) maybe we can relase it after output block + current_columns[cid]->clear(); + } else { // non-predicate column + auto &column_desc = _schema.columns()[cid]; + if (is_block_mem_reuse) { + current_columns[cid] = std::move(*block->get_by_position(i).column).mutate(); + } else { + auto data_type = Schema::get_data_type_ptr(column_desc->type()); + if (column_desc->is_nullable()) { + current_columns[cid] = doris::vectorized::ColumnNullable::create( + std::move(data_type->create_column()), doris::vectorized::ColumnUInt8::create()); + } else { + current_columns[cid] = data_type->create_column(); + } + } + if (column_desc->type() == OLAP_FIELD_TYPE_DATE) { + current_columns[cid]->set_date_type(); + } else if (column_desc->type() == OLAP_FIELD_TYPE_DATETIME) { + current_columns[cid]->set_datetime_type(); + } + } + } +} + +void SegmentIterator::_output_non_pred_columns(vectorized::Block* block, bool is_block_mem_reuse) { + for (auto cid : _non_predicate_columns) { + block->replace_by_position(_schema_block_id_map[cid], std::move(_current_return_columns[cid])); + } + } + +void SegmentIterator::_output_column_by_sel_idx(vectorized::Block* block, std::vector<ColumnId> columnIds, + uint16_t* sel_rowid_idx, uint16_t select_size, bool is_block_mem_reuse) { + for (auto cid : columnIds) { + auto &column_ptr = _current_return_columns[cid]; + if (is_block_mem_reuse) { + column_ptr->filter_by_selector(sel_rowid_idx, select_size, + &block->get_by_position(_schema_block_id_map[cid]).column); + } else { + block->replace_by_position(_schema_block_id_map[cid], + (*column_ptr).get_ptr()->filter_by_selector(sel_rowid_idx, select_size)); + } + } + } + + +Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read, bool set_block_rowid) { + do { + uint32_t range_from; + uint32_t range_to; + bool has_next_range = + _range_iter->next_range(nrows_read_limit - nrows_read, &range_from, &range_to); + if (!has_next_range) { + break; + } + if (_cur_rowid == 0 || _cur_rowid != range_from) { + _cur_rowid = range_from; + RETURN_IF_ERROR(_seek_columns(_first_read_column_ids, _cur_rowid)); + } + size_t rows_to_read = range_to - range_from; + RETURN_IF_ERROR(_read_columns(_first_read_column_ids, _current_return_columns, rows_to_read)); + _cur_rowid += rows_to_read; + if (set_block_rowid) { + for (uint32_t rid = range_from; rid < range_to; rid++) { + _block_rowids[nrows_read++] = rid; + } + } else { + nrows_read += rows_to_read; + } + } while (nrows_read < nrows_read_limit); + return Status::OK(); +} + +void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t& selected_size) { + uint16_t new_size = 0; + if (_vec_pred_column_ids.size() == 0) { + for (uint32_t i = 0; i < selected_size; ++i) { + sel_rowid_idx[new_size++] = i; + } + return; + } + + uint16_t original_size = selected_size; + bool ret_flags[selected_size]; + memset(ret_flags, 1, selected_size); + _pre_eval_block_predicate->evaluate_vec(_current_return_columns, selected_size, ret_flags); + + for (uint32_t i = 0; i < selected_size; ++i) { + if (ret_flags[i]) { + sel_rowid_idx[new_size++] = i; + } + } + + _opts.stats->rows_vec_cond_filtered += original_size - new_size; + selected_size = new_size; +} + +void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_idx, uint16_t* selected_size_ptr) { + if (_short_cir_pred_column_ids.size() == 0) { + return; + } + + for (auto column_predicate : _short_cir_eval_predicate) { + auto column_id = column_predicate->column_id(); + auto& short_cir_column = _current_return_columns[column_id]; + column_predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr); + } + + // evaluate delete condition + _opts.delete_condition_predicates->evaluate(_current_return_columns, vec_sel_rowid_idx, selected_size_ptr); +} + +void SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_column_ids, std::vector<rowid_t>& rowid_vector, + uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns) { + size_t start_idx = 0; + while (start_idx < select_size) { + size_t end_idx = start_idx + 1; + while (end_idx < select_size && (rowid_vector[sel_rowid_idx[end_idx - 1]] == rowid_vector[sel_rowid_idx[end_idx]] - 1)) { + end_idx++; + } + size_t range = end_idx - start_idx; + _seek_columns(read_column_ids, rowid_vector[sel_rowid_idx[start_idx]]); + _read_columns(read_column_ids, *mutable_columns, range); + start_idx += range; + } +} + Status SegmentIterator::next_batch(vectorized::Block* block) { - //TODO - return Status::NotSupported("not implement now"); + bool is_mem_reuse = block->mem_reuse(); + SCOPED_RAW_TIMER(&_opts.stats->block_load_ns); + if (UNLIKELY(!_inited)) { + RETURN_IF_ERROR(_init(true)); + _inited = true; + if (_vec_pred_column_ids.size() > 0 || _short_cir_pred_column_ids.size() > 0) { + _block_rowids.reserve(_opts.block_row_max); + } + _current_return_columns.resize(_schema.columns().size()); + for (size_t i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_ids()[i]; + if (_is_pred_column[cid]) { + auto& column_desc = _schema.columns()[cid]; + _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(column_desc->type(), column_desc->is_nullable()); + _current_return_columns[cid]->reserve(_opts.block_row_max); + } + } + } + + _init_current_block(block, _current_return_columns); + + uint32_t nrows_read = 0; + uint32_t nrows_read_limit = _opts.block_row_max; + _read_columns_by_index(nrows_read_limit, nrows_read, _col_predicates.size() > 0); + + _opts.stats->blocks_load += 1; + _opts.stats->raw_rows_read += nrows_read; + + if (nrows_read == 0) { + for (int i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_ids()[i]; + // todo(wb) abstract make column where + if (!_is_pred_column[cid]) { // non-predicate + block->replace_by_position(i, std::move(_current_return_columns[cid])); + } else { // predicate + if (!is_mem_reuse) { + auto* column_desc = _schema.columns()[cid]; + auto data_type = Schema::get_data_type_ptr(column_desc->type()); + block->replace_by_position(i, data_type->create_column()); + } + } + // not sure whether block is clear before enter segmentIter, so clear it here. + if (is_mem_reuse) { + block->clear_column_data(); + } + } + return Status::EndOfFile("no more data in segment"); + } + + // when no predicate(include delete condition) is provided, output column directly + if (_vec_pred_column_ids.size() == 0 && _short_cir_pred_column_ids.size() == 0) { + _output_non_pred_columns(block, is_mem_reuse); + } else { // need predicate evaluation + uint16_t selected_size = nrows_read; + uint16_t sel_rowid_idx[selected_size]; + + // step 1: evaluate vectorization predicate + _evaluate_vectorization_predicate(sel_rowid_idx, selected_size); + + // When predicate column and no-predicate column are both basic type, lazy materialization is eliminate + // So output block directly after vecorization evaluation + if (_is_all_column_basic_type) { + _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx, selected_size, is_mem_reuse); + return Status::OK(); + } + + // step 2: evaluate short ciruit predicate + // todo(wb) research whether need to read short predicate after vectorization evaluation + // to reduce cost of read short circuit columns. + // In SSB test, it make no difference; So need more scenarios to test + _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size); + + // step3: read non_predicate column + if (_non_predicate_columns.size() != 0) { + _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx, selected_size, &_current_return_columns); + } + + // step4: output columns + // 4.1 output non-predicate column + _output_non_pred_columns(block, is_mem_reuse); + + // 4.2 output short circuit predicate column + _output_column_by_sel_idx(block, _short_cir_pred_column_ids, sel_rowid_idx, selected_size, is_mem_reuse); + // 4.3 output vectorizatioin predicate column + _output_column_by_sel_idx(block, _vec_pred_column_ids, sel_rowid_idx, selected_size, is_mem_reuse); + + } + + return Status::OK(); } } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index fabefd5..c06077e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -60,7 +60,7 @@ public: uint64_t data_id() const { return _segment->id(); } private: - Status _init(); + Status _init(bool is_vec = false); Status _init_return_column_iterators(); Status _init_bitmap_index_iterators(); @@ -78,6 +78,7 @@ private: Status _apply_bitmap_index(); void _init_lazy_materialization(); + void _vec_init_lazy_materialization(); uint32_t segment_id() const { return _segment->id(); } uint32_t num_rows() const { return _segment->num_rows(); } @@ -87,6 +88,17 @@ private: Status _read_columns(const std::vector<ColumnId>& column_ids, RowBlockV2* block, size_t row_offset, size_t nrows); + // for vectorization implementation + Status _read_columns(const std::vector<ColumnId>& column_ids, vectorized::MutableColumns& column_block, size_t nrows); + Status _read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read, bool set_block_rowid); + void _init_current_block(vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& non_pred_vector); + void _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t& selected_size); + void _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t* selected_size); + void _output_non_pred_columns(vectorized::Block* block, bool is_block_mem_reuse); + void _output_column_by_sel_idx(vectorized::Block* block, std::vector<ColumnId> columnids, uint16_t* sel_rowid_idx, uint16_t select_size, bool is_block_mem_reuse); + void _read_columns_by_rowids(std::vector<ColumnId>& read_column_ids, std::vector<rowid_t>& rowid_vector, + uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns); + private: class BitmapRangeIterator; @@ -116,6 +128,21 @@ private: // could be a local variable of next_batch(), kept here to reuse vector memory std::vector<rowid_t> _block_rowids; + // fields for vectorization execution + bool _is_all_column_basic_type; + std::vector<ColumnId> _vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation + std::vector<ColumnId> _short_cir_pred_column_ids; // keep columnId of columns for short circuit predicate evaluation + vector<bool> _is_pred_column; // columns hold by segmentIter + vectorized::MutableColumns _current_return_columns; + AndBlockColumnPredicate* _pre_eval_block_predicate = nullptr; + std::vector<ColumnPredicate*> _short_cir_eval_predicate; + // when lazy materialization is enable, segmentIter need to read data at least twice + // first, read predicate columns by various index + // second, read non-predicate columns + // so we need a field to stand for columns first time to read + vector<ColumnId> _first_read_column_ids; + vector<int> _schema_block_id_map; // map from schema column id to column idx in Block + // the actual init process is delayed to the first call to next_batch() bool _inited; diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index d588b58..f5a0900 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -18,6 +18,11 @@ #include "olap/schema.h" #include "olap/row_block2.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_complex.h" +#include "vec/columns/predicate_column.h" +#include "vec/core/types.h" +#include "olap/uint24.h" namespace doris { @@ -105,6 +110,9 @@ Schema::~Schema() { vectorized::DataTypePtr Schema::get_data_type_ptr(FieldType type) { switch (type) { + case OLAP_FIELD_TYPE_BOOL: + return std::make_shared<vectorized::DataTypeUInt8>(); + case OLAP_FIELD_TYPE_TINYINT: return std::make_shared<vectorized::DataTypeInt8>(); @@ -135,7 +143,11 @@ vectorized::DataTypePtr Schema::get_data_type_ptr(FieldType type) { case OLAP_FIELD_TYPE_CHAR: case OLAP_FIELD_TYPE_VARCHAR: case OLAP_FIELD_TYPE_HLL: + case OLAP_FIELD_TYPE_STRING: return std::make_shared<vectorized::DataTypeString>(); + + case TYPE_OBJECT: + return std::make_shared<vectorized::DataTypeBitMap>(); case OLAP_FIELD_TYPE_DECIMAL: return std::make_shared<vectorized::DataTypeDecimal<vectorized::Decimal128>>(27, 9); @@ -147,4 +159,58 @@ vectorized::DataTypePtr Schema::get_data_type_ptr(FieldType type) { return nullptr; } +vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(FieldType type, bool is_null) { + vectorized::IColumn::MutablePtr ptr = Schema::get_predicate_column_ptr(type); + if (is_null) { + return doris::vectorized::ColumnNullable::create(std::move(ptr), doris::vectorized::ColumnUInt8::create()); + } + return ptr; +} + +vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(FieldType type) { + switch (type) { + case OLAP_FIELD_TYPE_BOOL: + return doris::vectorized::PredicateColumnType<bool>::create();; + case OLAP_FIELD_TYPE_TINYINT: + return doris::vectorized::PredicateColumnType<doris::vectorized::Int8>::create(); + + case OLAP_FIELD_TYPE_SMALLINT: + return doris::vectorized::PredicateColumnType<doris::vectorized::Int16>::create(); + + case OLAP_FIELD_TYPE_INT: + return doris::vectorized::PredicateColumnType<doris::vectorized::Int32>::create(); + + case OLAP_FIELD_TYPE_FLOAT: + return doris::vectorized::PredicateColumnType<doris::vectorized::Float32>::create(); + + case OLAP_FIELD_TYPE_DOUBLE: + return doris::vectorized::PredicateColumnType<doris::vectorized::Float64>::create(); + + case OLAP_FIELD_TYPE_BIGINT: + return doris::vectorized::PredicateColumnType<doris::vectorized::Int64>::create(); + + case OLAP_FIELD_TYPE_LARGEINT: + return doris::vectorized::PredicateColumnType<doris::vectorized::Int128>::create(); + + case OLAP_FIELD_TYPE_DATE: + return doris::vectorized::PredicateColumnType<uint24_t>::create(); + + case OLAP_FIELD_TYPE_DATETIME: + return doris::vectorized::PredicateColumnType<uint64_t>::create(); + + case OLAP_FIELD_TYPE_CHAR: + case OLAP_FIELD_TYPE_VARCHAR: + case OLAP_FIELD_TYPE_STRING: + return doris::vectorized::PredicateColumnType<StringValue>::create(); + + case OLAP_FIELD_TYPE_DECIMAL: + return doris::vectorized::PredicateColumnType<decimal12_t>::create(); + + default: + DCHECK(false); + } + // For llvm complain + return nullptr; +} + } // namespace doris diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index 3fd90bf..2596f97 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -102,6 +102,10 @@ public: static vectorized::DataTypePtr get_data_type_ptr(FieldType type); + static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type); + + static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(FieldType type, bool is_null = false); + const std::vector<Field*>& columns() const { return _cols; } const Field* column(ColumnId cid) const { return _cols[cid]; } diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 5e41028..a869a65 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -215,7 +215,7 @@ public: * used by lazy materialization to filter column by selected rowids */ virtual Ptr filter_by_selector(const uint16_t* sel, size_t sel_size, Ptr* ptr = nullptr) { - return nullptr; + LOG(FATAL) << "column not support filter_by_selector"; }; /// Permutes elements using specified permutation. Is used in sortings. @@ -333,6 +333,8 @@ public: /// True if column contains something nullable inside. It's true for ColumnNullable, can be true or false for ColumnConst, etc. virtual bool is_nullable() const { return false; } + virtual bool is_bitmap() const { return false; } + // true iff column has null element virtual bool has_null() const { return false; } @@ -414,10 +416,15 @@ public: // only used in ColumnNullable replace_column_data virtual void replace_column_data_default(size_t self_row = 0) = 0; - bool is_date_type() { return is_date; } + virtual bool is_date_type() { return is_date; } + virtual bool is_datetime_type() { return is_date_time; } + + virtual void set_date_type() { is_date = true; } + virtual void set_datetime_type() { is_date_time = true; } - // todo(wb): a temporary implemention, need refactor here + // todo(wb): a temporary implemention, need re-abstract here bool is_date = false; + bool is_date_time = false; protected: /// Template is to devirtualize calls to insert_from method. diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 63a120a..296f94b 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -45,6 +45,8 @@ public: bool is_numeric() const override { return false; } + bool is_bitmap() const override { return std::is_same_v<T, BitmapValue>; } + size_t size() const override { return data.size(); } StringRef get_data_at(size_t n) const override { diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index bee6909..bf4bb44 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -177,9 +177,18 @@ ColumnPtr ColumnNullable::filter(const Filter& filt, ssize_t result_size_hint) c } ColumnPtr ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr) { - ColumnPtr filtered_data = get_nested_column().filter_by_selector(sel, sel_size, ptr); - ColumnPtr filtered_null_map = get_null_map_column().filter_by_selector(sel, sel_size, ptr); - return ColumnNullable::create(filtered_data, filtered_null_map); + if (ptr != nullptr) { + const ColumnNullable* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>((*ptr).get()); + ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column; + ColumnPtr null_map_ptr = nullable_col_ptr->null_map; + get_nested_column().filter_by_selector(sel, sel_size, &nest_col_ptr); + get_null_map_column().filter_by_selector(sel, sel_size, &null_map_ptr); + return *ptr; + } else { + ColumnPtr filtered_data = get_nested_column().filter_by_selector(sel, sel_size); + ColumnPtr filtered_null_map = get_null_map_column().filter_by_selector(sel, sel_size); + return ColumnNullable::create(filtered_data, filtered_null_map); + } } ColumnPtr ColumnNullable::permute(const Permutation& perm, size_t limit) const { diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 8f47777..fb6aa8f 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -130,7 +130,13 @@ public: return false; } + bool is_date_type() override { return get_nested_column().is_date_type(); } + bool is_datetime_type() override { return get_nested_column().is_datetime_type(); } + void set_date_type() override { get_nested_column().set_date_type(); } + void set_datetime_type() override { get_nested_column().set_datetime_type(); } + bool is_nullable() const override { return true; } + bool is_bitmap() const override { return get_nested_column().is_bitmap(); } bool is_column_decimal() const override { return get_nested_column().is_column_decimal(); } bool is_column_string() const override { return get_nested_column().is_column_string(); } bool is_fixed_and_contiguous() const override { return false; } diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 5323f60..6626229 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -119,6 +119,13 @@ private: /// Sugar constructor. ColumnVector(std::initializer_list<T> il) : data {il} {} + void insert_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<T>* res_ptr) { + for (size_t i = 0; i < sel_size; i++) { + T* val_ptr = &data[sel[i]]; + res_ptr->insert_data((char*)val_ptr, 0); + } + } + public: bool is_numeric() const override { return IsNumber<T>; } @@ -195,6 +202,25 @@ public: ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override; + // note(wb) this method is only used in storage layer now + ColumnPtr filter_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) override { + if (ptr == nullptr) { + auto res_ptr = vectorized::ColumnVector<T>::create(); + if (sel_size == 0) { + return res_ptr; + } + insert_res_column(sel, sel_size, res_ptr.get()); + return res_ptr; + } else { + auto res_ptr = (*std::move(*ptr)).assume_mutable(); + if (sel_size == 0) { + return res_ptr; + } + insert_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnVector<T>*>(res_ptr.get())); + return *ptr; + } + } + ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override; // ColumnPtr index(const IColumn & indexes, size_t limit) const override; diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index c8f8627..8095cff 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -97,6 +97,14 @@ private: } } + void insert_byte_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::IColumn* res_ptr) { + for (size_t i = 0; i < sel_size; i++) { + uint16_t n = sel[i]; + char* ch_val = reinterpret_cast<char*>(&data[n]); + res_ptr->insert_data(ch_val, 0); + } + } + template <typename Y> ColumnPtr filter_default_type_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) { static_assert(std::is_same_v<T, Y>); @@ -371,6 +379,23 @@ public: return *ptr; } + ColumnPtr filter_bool_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) { + if (ptr == nullptr) { + auto res = vectorized::ColumnVector<vectorized::UInt8>::create(); + if (sel_size == 0) { + return res; + } + res->reserve(sel_size); + insert_byte_to_res_column(sel, sel_size, res.get()); + } else { + if (sel_size != 0) { + MutableColumnPtr ptr_res = (*std::move(*ptr)).assume_mutable(); + insert_byte_to_res_column(sel, sel_size, ptr_res.get()); + } + } + return *ptr; + } + //todo(wb) need refactor this method, using return status to check unexpect args instead of LOG(FATAL) ColumnPtr filter_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) override { if constexpr (std::is_same_v<T, StringValue>) { @@ -396,7 +421,7 @@ public: } else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) { return filter_default_type_by_selector<doris::vectorized::Int128>(sel, sel_size, ptr); } else if (std::is_same_v<T, bool>) { - LOG(FATAL) << "bool will be support later"; + return filter_bool_by_selector(sel, sel_size, ptr); } else { LOG(FATAL) << "unexpected type in predicate column"; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org