This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new d711d64 [fix](vectorization)Some small fix for SegmentIter Vectorization (#8267) d711d64 is described below commit d711d64dda3b92f3b51154ee585bf0c92b9aed0d Author: wangbo <wan...@apache.org> AuthorDate: Tue Mar 8 13:13:17 2022 +0800 [fix](vectorization)Some small fix for SegmentIter Vectorization (#8267) 1. No longer using short-circuit to evaluate date type, because the cost of read date type is small, lazy materialization has higher costs. 2. Fix read hll/bitmap/date type error results. --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 5 +---- be/src/vec/columns/column_complex.h | 16 ++++++++++++++++ be/src/vec/columns/column_vector.h | 4 ++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 2883e63..6a8febb 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -613,10 +613,7 @@ void SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column[cid] = true; pred_column_ids.insert(cid); - // for date type which can not be executed in a vectorized way, using short circuit execution - if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || - type == OLAP_FIELD_TYPE_DECIMAL || type == OLAP_FIELD_TYPE_DATE || - predicate->is_in_predicate()) { + if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING || predicate->is_in_predicate()) { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); _is_all_column_basic_type = false; diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index cd26c7f..df2b582 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -63,6 +63,7 @@ public: } void insert_many_binary_data(char* data_array, uint32_t* len_array, uint32_t* start_offset_array, size_t num) override { + resize(num); if constexpr (std::is_same_v<T, BitmapValue>) { for (size_t i = 0; i < num; i++) { uint32_t len = len_array[i]; @@ -76,6 +77,21 @@ public: *pvalue = std::move(*reinterpret_cast<BitmapValue*>(data_array + start_offset)); } } + } else if constexpr (std::is_same_v<T, HyperLogLog>) { + for (size_t i = 0; i < num; i++) { + uint32_t len = len_array[i]; + uint32_t start_offset = start_offset_array[i]; + HyperLogLog* pvalue = &get_element(size() - 1); + if (len != 0) { + HyperLogLog value; + value.deserialize(Slice(data_array + start_offset, len)); + *pvalue = std::move(value); + } else { + *pvalue = std::move(*reinterpret_cast<HyperLogLog*>(data_array + start_offset)); + } + } + } else { + LOG(FATAL) << "Unexpected type in column complex"; } } diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index c6b2311..216e8fd 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -179,7 +179,7 @@ public: value |= *(unsigned char*)(cur_ptr); vectorized::VecDateTimeValue date; date.from_olap_date(value); - data.push_back_without_reserve(date); + this->insert_data(reinterpret_cast<char*>(&date), 0); } } @@ -189,7 +189,7 @@ public: const char* cur_ptr = data_ptr + value_size * i; uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr); vectorized::VecDateTimeValue date(value); - data.push_back_without_reserve(date); + this->insert_data(reinterpret_cast<char*>(&date), 0); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org