This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 52d18aa83c permute impl for column array; and codes format (#8949) 52d18aa83c is described below commit 52d18aa83c1180f419b5fba6ab908f320b262346 Author: camby <104178...@qq.com> AuthorDate: Wed Apr 13 09:47:54 2022 +0800 permute impl for column array; and codes format (#8949) Co-authored-by: cambyzju <zhuxiaol...@baidu.com> --- be/src/vec/columns/column_array.cpp | 405 ++++++++++++++++++--------------- be/src/vec/columns/column_array.h | 111 ++++----- be/test/vec/core/column_array_test.cpp | 101 ++++++++ 3 files changed, 386 insertions(+), 231 deletions(-) diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 4754ca3b70..cc4f380f7e 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -18,27 +18,28 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnArray.cpp // and modified by Doris +#include "vec/columns/column_array.h" + #include <string.h> // memcpy -#include "vec/common/assert_cast.h" #include "vec/columns/collator.h" -#include "vec/columns/column_array.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/columns_common.h" #include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" namespace doris::vectorized { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int BAD_ARGUMENTS; - extern const int PARAMETER_OUT_OF_BOUND; - extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; - extern const int TOO_LARGE_ARRAY_SIZE; -} +extern const int NOT_IMPLEMENTED; +extern const int BAD_ARGUMENTS; +extern const int PARAMETER_OUT_OF_BOUND; +extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; +extern const int LOGICAL_ERROR; +extern const int TOO_LARGE_ARRAY_SIZE; +} // namespace ErrorCodes /** Obtaining array as Field can be slow for large arrays and consume vast amount of memory. * Just don't allow to do it. @@ -48,9 +49,9 @@ namespace ErrorCodes { */ static constexpr size_t max_array_size_as_field = 1000000; -ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column) - : data(std::move(nested_column)), offsets(std::move(offsets_column)) { - const ColumnOffsets * offsets_concrete = typeid_cast<const ColumnOffsets *>(offsets.get()); +ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column) + : data(std::move(nested_column)), offsets(std::move(offsets_column)) { + const ColumnOffsets* offsets_concrete = typeid_cast<const ColumnOffsets*>(offsets.get()); if (!offsets_concrete) { LOG(FATAL) << "offsets_column must be a ColumnUInt64"; @@ -71,8 +72,7 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && */ } -ColumnArray::ColumnArray(MutableColumnPtr && nested_column) - : data(std::move(nested_column)) { +ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nested_column)) { if (!data->empty()) { LOG(FATAL) << "Not empty data passed to ColumnArray, but no offsets passed"; } @@ -80,13 +80,14 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column) offsets = ColumnOffsets::create(); } -std::string ColumnArray::get_name() const { return "Array(" + get_data().get_name() + ")"; } +std::string ColumnArray::get_name() const { + return "Array(" + get_data().get_name() + ")"; +} MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const { auto res = ColumnArray::create(get_data().clone_empty()); - if (to_size == 0) - return res; + if (to_size == 0) return res; size_t from_size = size(); if (to_size <= from_size) { @@ -103,8 +104,7 @@ MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const { } res->get_offsets().resize(to_size); - for (size_t i = from_size; i < to_size; ++i) - res->get_offsets()[i] = offset; + for (size_t i = from_size; i < to_size; ++i) res->get_offsets()[i] = offset; } return res; @@ -124,13 +124,12 @@ Field ColumnArray::operator[](size_t n) const { Array res(size); - for (size_t i = 0; i < size; ++i) - res[i] = get_data()[offset + i]; + for (size_t i = 0; i < size; ++i) res[i] = get_data()[offset + i]; return res; } -void ColumnArray::get(size_t n, Field & res) const { +void ColumnArray::get(size_t n, Field& res) const { size_t offset = offset_at(n); size_t size = size_at(n); @@ -139,10 +138,9 @@ void ColumnArray::get(size_t n, Field & res) const { << " maximum size " << max_array_size_as_field; res = Array(size); - Array & res_arr = doris::vectorized::get<Array &>(res); + Array& res_arr = doris::vectorized::get<Array&>(res); - for (size_t i = 0; i < size; ++i) - get_data().get(offset + i, res_arr[i]); + for (size_t i = 0; i < size; ++i) get_data().get(offset + i, res_arr[i]); } StringRef ColumnArray::get_data_at(size_t n) const { @@ -156,8 +154,7 @@ StringRef ColumnArray::get_data_at(size_t n) const { StringRef first = get_data().get_data_at_with_terminating_zero(offset_of_first_elem); size_t array_size = size_at(n); - if (array_size == 0) - return StringRef(first.data, 0); + if (array_size == 0) return StringRef(first.data, 0); size_t offset_of_last_elem = get_offsets()[n] - 1; StringRef last = get_data().get_data_at_with_terminating_zero(offset_of_last_elem); @@ -166,11 +163,11 @@ StringRef ColumnArray::get_data_at(size_t n) const { } bool ColumnArray::is_default_at(size_t n) const { - const auto & offsets_data = get_offsets(); + const auto& offsets_data = get_offsets(); return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1]; } -void ColumnArray::insert_data(const char * pos, size_t length) { +void ColumnArray::insert_data(const char* pos, size_t length) { /** Similarly - only for arrays of fixed length values. */ if (!data->is_fixed_and_contiguous()) @@ -180,9 +177,8 @@ void ColumnArray::insert_data(const char * pos, size_t length) { size_t elems = 0; - if (length) - { - const char * end = pos + length; + if (length) { + const char* end = pos + length; for (; pos + field_size <= end; pos += field_size, ++elems) data->insert_data(pos, field_size); @@ -193,11 +189,12 @@ void ColumnArray::insert_data(const char * pos, size_t length) { get_offsets().push_back(get_offsets().back() + elems); } -StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const { +StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena, + char const*& begin) const { size_t array_size = size_at(n); size_t offset = offset_at(n); - char * pos = arena.alloc_continue(sizeof(array_size), begin); + char* pos = arena.alloc_continue(sizeof(array_size), begin); memcpy(pos, &array_size, sizeof(array_size)); StringRef res(pos, sizeof(array_size)); @@ -211,36 +208,33 @@ StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, char return res; } -const char * ColumnArray::deserialize_and_insert_from_arena(const char * pos) { +const char* ColumnArray::deserialize_and_insert_from_arena(const char* pos) { size_t array_size = unaligned_load<size_t>(pos); pos += sizeof(array_size); - for (size_t i = 0; i < array_size; ++i) - pos = get_data().deserialize_and_insert_from_arena(pos); + for (size_t i = 0; i < array_size; ++i) pos = get_data().deserialize_and_insert_from_arena(pos); get_offsets().push_back(get_offsets().back() + array_size); return pos; } -void ColumnArray::update_hash_with_value(size_t n, SipHash & hash) const { +void ColumnArray::update_hash_with_value(size_t n, SipHash& hash) const { size_t array_size = size_at(n); size_t offset = offset_at(n); hash.update(array_size); - for (size_t i = 0; i < array_size; ++i) - get_data().update_hash_with_value(offset + i, hash); + for (size_t i = 0; i < array_size; ++i) get_data().update_hash_with_value(offset + i, hash); } -void ColumnArray::insert(const Field & x) { - const Array & array = doris::vectorized::get<const Array &>(x); +void ColumnArray::insert(const Field& x) { + const Array& array = doris::vectorized::get<const Array&>(x); size_t size = array.size(); - for (size_t i = 0; i < size; ++i) - get_data().insert(array[i]); + for (size_t i = 0; i < size; ++i) get_data().insert(array[i]); get_offsets().push_back(get_offsets().back() + size); } -void ColumnArray::insert_from(const IColumn & src_, size_t n) { - const ColumnArray & src = assert_cast<const ColumnArray &>(src_); +void ColumnArray::insert_from(const IColumn& src_, size_t n) { + const ColumnArray& src = assert_cast<const ColumnArray&>(src_); size_t size = src.size_at(n); size_t offset = src.offset_at(n); @@ -256,17 +250,17 @@ void ColumnArray::insert_default() { } void ColumnArray::pop_back(size_t n) { - auto & offsets_data = get_offsets(); + auto& offsets_data = get_offsets(); DCHECK(n <= offsets_data.size()); size_t nested_n = offsets_data.back() - offset_at(offsets_data.size() - n); - if (nested_n) - get_data().pop_back(nested_n); + if (nested_n) get_data().pop_back(nested_n); offsets_data.resize_assume_reserved(offsets_data.size() - n); } void ColumnArray::reserve(size_t n) { get_offsets().reserve(n); - get_data().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1. + get_data().reserve( + n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1. } size_t ColumnArray::byte_size() const { @@ -288,24 +282,24 @@ ColumnPtr ColumnArray::convert_to_full_column_if_const() const { return ColumnArray::create(data->convert_to_full_column_if_const(), offsets); } -void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t length) { - if (length == 0) - return; +void ColumnArray::insert_range_from(const IColumn& src, size_t start, size_t length) { + if (length == 0) return; - const ColumnArray & src_concrete = assert_cast<const ColumnArray &>(src); + const ColumnArray& src_concrete = assert_cast<const ColumnArray&>(src); if (start + length > src_concrete.get_offsets().size()) LOG(FATAL) << "Parameter out of bound in ColumnArray::insert_range_from method. [start(" << std::to_string(start) << ") + length(" << std::to_string(length) - << ") > offsets.size(" << std::to_string(src_concrete.get_offsets().size()) << ")]"; + << ") > offsets.size(" << std::to_string(src_concrete.get_offsets().size()) + << ")]"; size_t nested_offset = src_concrete.offset_at(start); size_t nested_length = src_concrete.get_offsets()[start + length - 1] - nested_offset; get_data().insert_range_from(src_concrete.get_data(), nested_offset, nested_length); - Offsets & cur_offsets = get_offsets(); - const Offsets & src_offsets = src_concrete.get_offsets(); + Offsets& cur_offsets = get_offsets(); + const Offsets& src_offsets = src_concrete.get_offsets(); if (start == 0 && cur_offsets.empty()) { cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length); @@ -320,55 +314,64 @@ void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t le } } -ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const { - if (typeid_cast<const ColumnUInt8 *>(data.get())) return filter_number<UInt8>(filt, result_size_hint); - if (typeid_cast<const ColumnUInt16 *>(data.get())) return filter_number<UInt16>(filt, result_size_hint); - if (typeid_cast<const ColumnUInt32 *>(data.get())) return filter_number<UInt32>(filt, result_size_hint); - if (typeid_cast<const ColumnUInt64 *>(data.get())) return filter_number<UInt64>(filt, result_size_hint); - if (typeid_cast<const ColumnInt8 *>(data.get())) return filter_number<Int8>(filt, result_size_hint); - if (typeid_cast<const ColumnInt16 *>(data.get())) return filter_number<Int16>(filt, result_size_hint); - if (typeid_cast<const ColumnInt32 *>(data.get())) return filter_number<Int32>(filt, result_size_hint); - if (typeid_cast<const ColumnInt64 *>(data.get())) return filter_number<Int64>(filt, result_size_hint); - if (typeid_cast<const ColumnFloat32 *>(data.get())) return filter_number<Float32>(filt, result_size_hint); - if (typeid_cast<const ColumnFloat64 *>(data.get())) return filter_number<Float64>(filt, result_size_hint); - if (typeid_cast<const ColumnString *>(data.get())) return filter_string(filt, result_size_hint); +ColumnPtr ColumnArray::filter(const Filter& filt, ssize_t result_size_hint) const { + if (typeid_cast<const ColumnUInt8*>(data.get())) + return filter_number<UInt8>(filt, result_size_hint); + if (typeid_cast<const ColumnUInt16*>(data.get())) + return filter_number<UInt16>(filt, result_size_hint); + if (typeid_cast<const ColumnUInt32*>(data.get())) + return filter_number<UInt32>(filt, result_size_hint); + if (typeid_cast<const ColumnUInt64*>(data.get())) + return filter_number<UInt64>(filt, result_size_hint); + if (typeid_cast<const ColumnInt8*>(data.get())) + return filter_number<Int8>(filt, result_size_hint); + if (typeid_cast<const ColumnInt16*>(data.get())) + return filter_number<Int16>(filt, result_size_hint); + if (typeid_cast<const ColumnInt32*>(data.get())) + return filter_number<Int32>(filt, result_size_hint); + if (typeid_cast<const ColumnInt64*>(data.get())) + return filter_number<Int64>(filt, result_size_hint); + if (typeid_cast<const ColumnFloat32*>(data.get())) + return filter_number<Float32>(filt, result_size_hint); + if (typeid_cast<const ColumnFloat64*>(data.get())) + return filter_number<Float64>(filt, result_size_hint); + if (typeid_cast<const ColumnString*>(data.get())) return filter_string(filt, result_size_hint); //if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint); - if (typeid_cast<const ColumnNullable *>(data.get())) return filter_nullable(filt, result_size_hint); + if (typeid_cast<const ColumnNullable*>(data.get())) + return filter_nullable(filt, result_size_hint); return filter_generic(filt, result_size_hint); } template <typename T> -ColumnPtr ColumnArray::filter_number(const Filter & filt, ssize_t result_size_hint) const { - if (get_offsets().empty()) - return ColumnArray::create(data); +ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hint) const { + if (get_offsets().empty()) return ColumnArray::create(data); auto res = ColumnArray::create(data->clone_empty()); - auto & res_elems = assert_cast<ColumnVector<T> &>(res->get_data()).get_data(); - Offsets & res_offsets = res->get_offsets(); + auto& res_elems = assert_cast<ColumnVector<T>&>(res->get_data()).get_data(); + Offsets& res_offsets = res->get_offsets(); - filter_arrays_impl<T>(assert_cast<const ColumnVector<T> &>(*data).get_data(), get_offsets(), res_elems, res_offsets, filt, result_size_hint); + filter_arrays_impl<T>(assert_cast<const ColumnVector<T>&>(*data).get_data(), get_offsets(), + res_elems, res_offsets, filt, result_size_hint); return res; } -ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hint) const { +ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hint) const { size_t col_size = get_offsets().size(); - if (col_size != filt.size()) - LOG(FATAL) << "Size of filter doesn't match size of column."; + if (col_size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column."; - if (0 == col_size) - return ColumnArray::create(data); + if (0 == col_size) return ColumnArray::create(data); auto res = ColumnArray::create(data->clone_empty()); - const ColumnString & src_string = typeid_cast<const ColumnString &>(*data); - const ColumnString::Chars & src_chars = src_string.get_chars(); - const Offsets & src_string_offsets = src_string.get_offsets(); - const Offsets & src_offsets = get_offsets(); + const ColumnString& src_string = typeid_cast<const ColumnString&>(*data); + const ColumnString::Chars& src_chars = src_string.get_chars(); + const Offsets& src_string_offsets = src_string.get_offsets(); + const Offsets& src_offsets = get_offsets(); - ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res->get_data()).get_chars(); - Offsets & res_string_offsets = typeid_cast<ColumnString &>(res->get_data()).get_offsets(); - Offsets & res_offsets = res->get_offsets(); + ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res->get_data()).get_chars(); + Offsets& res_string_offsets = typeid_cast<ColumnString&>(res->get_data()).get_offsets(); + Offsets& res_offsets = res->get_offsets(); if (result_size_hint < 0) { res_chars.reserve(src_chars.size()); @@ -389,13 +392,16 @@ ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hi if (filt[i]) { /// If the array is not empty - copy content. if (array_size) { - size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] - prev_src_string_offset; + size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] - + prev_src_string_offset; size_t res_chars_prev_size = res_chars.size(); res_chars.resize(res_chars_prev_size + chars_to_copy); - memcpy(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset], chars_to_copy); + memcpy(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset], + chars_to_copy); for (size_t j = 0; j < array_size; ++j) - res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] + prev_res_string_offset - prev_src_string_offset); + res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] + + prev_res_string_offset - prev_src_string_offset); prev_res_string_offset = res_string_offsets.back(); } @@ -413,13 +419,11 @@ ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hi return res; } -ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_hint) const { +ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hint) const { size_t size = get_offsets().size(); - if (size != filt.size()) - LOG(FATAL) << "Size of filter doesn't match size of column."; + if (size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column."; - if (size == 0) - return ColumnArray::create(data); + if (size == 0) return ColumnArray::create(data); Filter nested_filt(get_offsets().back()); for (size_t i = 0; i < size; ++i) { @@ -434,19 +438,18 @@ ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_h ssize_t nested_result_size_hint = 0; if (result_size_hint < 0) nested_result_size_hint = result_size_hint; - else if (result_size_hint && result_size_hint < 1000000000 && data->size() < 1000000000) /// Avoid overflow. - nested_result_size_hint = result_size_hint * data->size() / size; + else if (result_size_hint && result_size_hint < 1000000000 && + data->size() < 1000000000) /// Avoid overflow. + nested_result_size_hint = result_size_hint * data->size() / size; res->data = data->filter(nested_filt, nested_result_size_hint); - Offsets & res_offsets = res->get_offsets(); - if (result_size_hint) - res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size); + Offsets& res_offsets = res->get_offsets(); + if (result_size_hint) res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size); size_t current_offset = 0; for (size_t i = 0; i < size; ++i) { - if (filt[i]) - { + if (filt[i]) { current_offset += size_at(i); res_offsets.push_back(current_offset); } @@ -455,29 +458,29 @@ ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_h return res; } -ColumnPtr ColumnArray::filter_nullable(const Filter & filt, ssize_t result_size_hint) const { - if (get_offsets().empty()) - return ColumnArray::create(data); +ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t result_size_hint) const { + if (get_offsets().empty()) return ColumnArray::create(data); - const ColumnNullable & nullable_elems = assert_cast<const ColumnNullable &>(*data); + const ColumnNullable& nullable_elems = assert_cast<const ColumnNullable&>(*data); auto array_of_nested = ColumnArray::create(nullable_elems.get_nested_column_ptr(), offsets); auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint); - const auto & filtered_array_of_nested = assert_cast<const ColumnArray &>(*filtered_array_of_nested_owner); - const auto & filtered_offsets = filtered_array_of_nested.get_offsets_ptr(); + const auto& filtered_array_of_nested = + assert_cast<const ColumnArray&>(*filtered_array_of_nested_owner); + const auto& filtered_offsets = filtered_array_of_nested.get_offsets_ptr(); auto res_null_map = ColumnUInt8::create(); - filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(), res_null_map->get_data(), filt, result_size_hint); + filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(), + res_null_map->get_data(), filt, result_size_hint); - return ColumnArray::create( - ColumnNullable::create( - filtered_array_of_nested.get_data_ptr(), - std::move(res_null_map)), - filtered_offsets); + return ColumnArray::create(ColumnNullable::create(filtered_array_of_nested.get_data_ptr(), + std::move(res_null_map)), + filtered_offsets); } -void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) { +void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) { for (auto x = indices_begin; x != indices_end; ++x) { if (*x == -1) { ColumnArray::insert_default(); @@ -487,45 +490,56 @@ void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_beg } } -ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const { - if (replicate_offsets.empty()) - return clone_empty(); - - if (typeid_cast<const ColumnUInt8 *>(data.get())) return replicate_number<UInt8>(replicate_offsets); - if (typeid_cast<const ColumnUInt16 *>(data.get())) return replicate_number<UInt16>(replicate_offsets); - if (typeid_cast<const ColumnUInt32 *>(data.get())) return replicate_number<UInt32>(replicate_offsets); - if (typeid_cast<const ColumnUInt64 *>(data.get())) return replicate_number<UInt64>(replicate_offsets); - if (typeid_cast<const ColumnInt8 *>(data.get())) return replicate_number<Int8>(replicate_offsets); - if (typeid_cast<const ColumnInt16 *>(data.get())) return replicate_number<Int16>(replicate_offsets); - if (typeid_cast<const ColumnInt32 *>(data.get())) return replicate_number<Int32>(replicate_offsets); - if (typeid_cast<const ColumnInt64 *>(data.get())) return replicate_number<Int64>(replicate_offsets); - if (typeid_cast<const ColumnFloat32 *>(data.get())) return replicate_number<Float32>(replicate_offsets); - if (typeid_cast<const ColumnFloat64 *>(data.get())) return replicate_number<Float64>(replicate_offsets); - if (typeid_cast<const ColumnString *>(data.get())) return replicate_string(replicate_offsets); - if (typeid_cast<const ColumnConst *>(data.get())) return replicate_const(replicate_offsets); - if (typeid_cast<const ColumnNullable *>(data.get())) return replicate_nullable(replicate_offsets); +ColumnPtr ColumnArray::replicate(const Offsets& replicate_offsets) const { + if (replicate_offsets.empty()) return clone_empty(); + + if (typeid_cast<const ColumnUInt8*>(data.get())) + return replicate_number<UInt8>(replicate_offsets); + if (typeid_cast<const ColumnUInt16*>(data.get())) + return replicate_number<UInt16>(replicate_offsets); + if (typeid_cast<const ColumnUInt32*>(data.get())) + return replicate_number<UInt32>(replicate_offsets); + if (typeid_cast<const ColumnUInt64*>(data.get())) + return replicate_number<UInt64>(replicate_offsets); + if (typeid_cast<const ColumnInt8*>(data.get())) + return replicate_number<Int8>(replicate_offsets); + if (typeid_cast<const ColumnInt16*>(data.get())) + return replicate_number<Int16>(replicate_offsets); + if (typeid_cast<const ColumnInt32*>(data.get())) + return replicate_number<Int32>(replicate_offsets); + if (typeid_cast<const ColumnInt64*>(data.get())) + return replicate_number<Int64>(replicate_offsets); + if (typeid_cast<const ColumnFloat32*>(data.get())) + return replicate_number<Float32>(replicate_offsets); + if (typeid_cast<const ColumnFloat64*>(data.get())) + return replicate_number<Float64>(replicate_offsets); + if (typeid_cast<const ColumnString*>(data.get())) return replicate_string(replicate_offsets); + if (typeid_cast<const ColumnConst*>(data.get())) return replicate_const(replicate_offsets); + if (typeid_cast<const ColumnNullable*>(data.get())) + return replicate_nullable(replicate_offsets); //if (typeid_cast<const ColumnTuple *>(data.get())) return replicateTuple(replicate_offsets); return replicate_generic(replicate_offsets); } template <typename T> -ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const { +ColumnPtr ColumnArray::replicate_number(const Offsets& replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) LOG(FATAL) << "Size of offsets doesn't match size of column."; MutableColumnPtr res = clone_empty(); - if (0 == col_size) - return res; + if (0 == col_size) return res; - ColumnArray & res_arr = typeid_cast<ColumnArray &>(*res); + ColumnArray& res_arr = typeid_cast<ColumnArray&>(*res); - const typename ColumnVector<T>::Container & src_data = typeid_cast<const ColumnVector<T> &>(*data).get_data(); - const Offsets & src_offsets = get_offsets(); + const typename ColumnVector<T>::Container& src_data = + typeid_cast<const ColumnVector<T>&>(*data).get_data(); + const Offsets& src_offsets = get_offsets(); - typename ColumnVector<T>::Container & res_data = typeid_cast<ColumnVector<T> &>(res_arr.get_data()).get_data(); - Offsets & res_offsets = res_arr.get_offsets(); + typename ColumnVector<T>::Container& res_data = + typeid_cast<ColumnVector<T>&>(res_arr.get_data()).get_data(); + Offsets& res_offsets = res_arr.get_offsets(); res_data.reserve(data->size() / col_size * replicate_offsets.back()); res_offsets.reserve(replicate_offsets.back()); @@ -544,7 +558,8 @@ ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const if (value_size) { res_data.resize(res_data.size() + value_size); - memcpy(&res_data[res_data.size() - value_size], &src_data[prev_data_offset], value_size * sizeof(T)); + memcpy(&res_data[res_data.size() - value_size], &src_data[prev_data_offset], + value_size * sizeof(T)); } } @@ -555,26 +570,25 @@ ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const return res; } -ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const { +ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) LOG(FATAL) << "Size of offsets doesn't match size of column."; MutableColumnPtr res = clone_empty(); - if (0 == col_size) - return res; + if (0 == col_size) return res; - ColumnArray & res_arr = assert_cast<ColumnArray &>(*res); + ColumnArray& res_arr = assert_cast<ColumnArray&>(*res); - const ColumnString & src_string = typeid_cast<const ColumnString &>(*data); - const ColumnString::Chars & src_chars = src_string.get_chars(); - const Offsets & src_string_offsets = src_string.get_offsets(); - const Offsets & src_offsets = get_offsets(); + const ColumnString& src_string = typeid_cast<const ColumnString&>(*data); + const ColumnString::Chars& src_chars = src_string.get_chars(); + const Offsets& src_string_offsets = src_string.get_offsets(); + const Offsets& src_offsets = get_offsets(); - ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res_arr.get_data()).get_chars(); - Offsets & res_string_offsets = typeid_cast<ColumnString &>(res_arr.get_data()).get_offsets(); - Offsets & res_offsets = res_arr.get_offsets(); + ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res_arr.get_data()).get_chars(); + Offsets& res_string_offsets = typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets(); + Offsets& res_offsets = res_arr.get_offsets(); res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back()); res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back()); @@ -594,7 +608,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const /// The number of strings in the array. size_t value_size = src_offsets[i] - prev_src_offset; /// Number of characters in strings of the array, including zero bytes. - size_t sum_chars_size = src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset; /// -1th index is Ok, see PaddedPODArray. + size_t sum_chars_size = src_string_offsets[prev_src_offset + value_size - 1] - + prev_src_string_offset; /// -1th index is Ok, see PaddedPODArray. for (size_t j = 0; j < size_to_replicate; ++j) { current_res_offset += value_size; @@ -603,7 +618,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const size_t prev_src_string_offset_local = prev_src_string_offset; for (size_t k = 0; k < value_size; ++k) { /// Size of single string. - size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local; + size_t chars_size = + src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local; current_res_string_offset += chars_size; res_string_offsets.push_back(current_res_string_offset); @@ -615,7 +631,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const /// Copies the characters of the array of strings. res_chars.resize(res_chars.size() + sum_chars_size); memcpy_small_allow_read_write_overflow15( - &res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size); + &res_chars[res_chars.size() - sum_chars_size], + &src_chars[prev_src_string_offset], sum_chars_size); } } @@ -627,18 +644,17 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const return res; } -ColumnPtr ColumnArray::replicate_const(const Offsets & replicate_offsets) const { +ColumnPtr ColumnArray::replicate_const(const Offsets& replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) LOG(FATAL) << "Size of offsets doesn't match size of column."; - if (0 == col_size) - return clone_empty(); + if (0 == col_size) return clone_empty(); - const Offsets & src_offsets = get_offsets(); + const Offsets& src_offsets = get_offsets(); auto res_column_offsets = ColumnOffsets::create(); - Offsets & res_offsets = res_column_offsets->get_data(); + Offsets& res_offsets = res_column_offsets->get_data(); res_offsets.reserve(replicate_offsets.back()); Offset prev_replicate_offset = 0; @@ -658,48 +674,81 @@ ColumnPtr ColumnArray::replicate_const(const Offsets & replicate_offsets) const prev_data_offset = src_offsets[i]; } - return ColumnArray::create(get_data().clone_resized(current_new_offset), std::move(res_column_offsets)); + return ColumnArray::create(get_data().clone_resized(current_new_offset), + std::move(res_column_offsets)); } -ColumnPtr ColumnArray::replicate_generic(const Offsets & replicate_offsets) const { +ColumnPtr ColumnArray::replicate_generic(const Offsets& replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) LOG(FATAL) << "Size of offsets doesn't match size of column."; MutableColumnPtr res = clone_empty(); - ColumnArray & res_concrete = assert_cast<ColumnArray &>(*res); + ColumnArray& res_concrete = assert_cast<ColumnArray&>(*res); - if (0 == col_size) - return res; + if (0 == col_size) return res; IColumn::Offset prev_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_offset; prev_offset = replicate_offsets[i]; - for (size_t j = 0; j < size_to_replicate; ++j) - res_concrete.insert_from(*this, i); + for (size_t j = 0; j < size_to_replicate; ++j) res_concrete.insert_from(*this, i); } return res; } -ColumnPtr ColumnArray::replicate_nullable(const Offsets & replicate_offsets) const { - const ColumnNullable & nullable = assert_cast<const ColumnNullable &>(*data); +ColumnPtr ColumnArray::replicate_nullable(const Offsets& replicate_offsets) const { + const ColumnNullable& nullable = assert_cast<const ColumnNullable&>(*data); /// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result. /// NOTE Offsets are calculated twice and it is redundant. - auto array_of_nested = ColumnArray(nullable.get_nested_column_ptr()->assume_mutable(), get_offsets_ptr()->assume_mutable()) - .replicate(replicate_offsets); - auto array_of_null_map = ColumnArray(nullable.get_null_map_column_ptr()->assume_mutable(), get_offsets_ptr()->assume_mutable()) - .replicate(replicate_offsets); + auto array_of_nested = ColumnArray(nullable.get_nested_column_ptr()->assume_mutable(), + get_offsets_ptr()->assume_mutable()) + .replicate(replicate_offsets); + auto array_of_null_map = ColumnArray(nullable.get_null_map_column_ptr()->assume_mutable(), + get_offsets_ptr()->assume_mutable()) + .replicate(replicate_offsets); return ColumnArray::create( - ColumnNullable::create( - assert_cast<const ColumnArray &>(*array_of_nested).get_data_ptr(), - assert_cast<const ColumnArray &>(*array_of_null_map).get_data_ptr()), - assert_cast<const ColumnArray &>(*array_of_nested).get_offsets_ptr()); + ColumnNullable::create( + assert_cast<const ColumnArray&>(*array_of_nested).get_data_ptr(), + assert_cast<const ColumnArray&>(*array_of_null_map).get_data_ptr()), + assert_cast<const ColumnArray&>(*array_of_nested).get_offsets_ptr()); +} + +ColumnPtr ColumnArray::permute(const Permutation& perm, size_t limit) const { + size_t size = offsets->size(); + if (limit == 0) { + limit = size; + } else { + limit = std::min(size, limit); + } + if (perm.size() < limit) { + LOG(FATAL) << "Size of permutation is less than required."; + } + if (limit == 0) { + return ColumnArray::create(data); + } + + auto res = ColumnArray::create(data->clone_empty()); + auto& res_offsets = res->get_offsets(); + res_offsets.resize(limit); + + Permutation nested_perm; + nested_perm.reserve(data->size()); + + for (size_t i = 0; i < limit; ++i) { + res_offsets[i] = res_offsets[i - 1] + size_at(perm[i]); + for (size_t j = 0; j < size_at(perm[i]); ++j) { + nested_perm.push_back(offset_at(perm[i]) + j); + } + } + + res->data = data->permute(nested_perm, nested_perm.size()); + return res; } } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index e6567d1c97..74a0805336 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -20,11 +20,11 @@ #pragma once -#include "vec/common/arena.h" -#include "vec/common/assert_cast.h" #include "vec/columns/column.h" #include "vec/columns/column_impl.h" #include "vec/columns/column_vector.h" +#include "vec/common/arena.h" +#include "vec/common/assert_cast.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -38,12 +38,12 @@ private: friend class COWHelper<IColumn, ColumnArray>; /** Create an array column with specified values and offsets. */ - ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column); + ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column); /** Create an empty column of arrays with the type of values as in the column `nested_column` */ - explicit ColumnArray(MutableColumnPtr && nested_column); + explicit ColumnArray(MutableColumnPtr&& nested_column); - ColumnArray(const ColumnArray &) = default; + ColumnArray(const ColumnArray&) = default; public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. @@ -51,83 +51,87 @@ public: */ using Base = COWHelper<IColumn, ColumnArray>; - static Ptr create(const ColumnPtr & nested_column, const ColumnPtr & offsets_column) { - return ColumnArray::create(nested_column->assume_mutable(), offsets_column->assume_mutable()); + static Ptr create(const ColumnPtr& nested_column, const ColumnPtr& offsets_column) { + return ColumnArray::create(nested_column->assume_mutable(), + offsets_column->assume_mutable()); } - static Ptr create(const ColumnPtr & nested_column) { + static Ptr create(const ColumnPtr& nested_column) { return ColumnArray::create(nested_column->assume_mutable()); } - template <typename ... Args, typename = typename std::enable_if<IsMutableColumns<Args ...>::value>::type> - static MutablePtr create(Args &&... args) { return Base::create(std::forward<Args>(args)...); } + template <typename... Args, + typename = typename std::enable_if<IsMutableColumns<Args...>::value>::type> + static MutablePtr create(Args&&... args) { + return Base::create(std::forward<Args>(args)...); + } /** On the index i there is an offset to the beginning of the i + 1 -th element. */ using ColumnOffsets = ColumnVector<Offset>; std::string get_name() const override; - const char * get_family_name() const override { return "Array"; } + const char* get_family_name() const override { return "Array"; } bool can_be_inside_nullable() const override { return true; } TypeIndex get_data_type() const { return TypeIndex::Array; } MutableColumnPtr clone_resized(size_t size) const override; size_t size() const override; Field operator[](size_t n) const override; - void get(size_t n, Field & res) const override; + void get(size_t n, Field& res) const override; StringRef get_data_at(size_t n) const override; bool is_default_at(size_t n) const override; - void insert_data(const char * pos, size_t length) override; - StringRef serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const override; - const char * deserialize_and_insert_from_arena(const char * pos) override; - void update_hash_with_value(size_t n, SipHash & hash) const override; - void insert_range_from(const IColumn & src, size_t start, size_t length) override; - void insert(const Field & x) override; - void insert_from(const IColumn & src_, size_t n) override; + void insert_data(const char* pos, size_t length) override; + StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override; + const char* deserialize_and_insert_from_arena(const char* pos) override; + void update_hash_with_value(size_t n, SipHash& hash) const override; + void insert_range_from(const IColumn& src, size_t start, size_t length) override; + void insert(const Field& x) override; + void insert_from(const IColumn& src_, size_t n) override; void insert_default() override; void pop_back(size_t n) override; - ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; - [[noreturn]] ColumnPtr permute(const Permutation & perm, size_t limit) const override { - LOG(FATAL) << "permute not implemented"; - } + ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + ColumnPtr permute(const Permutation& perm, size_t limit) const override; //ColumnPtr index(const IColumn & indexes, size_t limit) const; //template <typename Type> ColumnPtr index_impl(const PaddedPODArray<Type> & indexes, size_t limit) const; - [[noreturn]] int compare_at(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { + [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_, + int nan_direction_hint) const override { LOG(FATAL) << "compare_at not implemented"; } - [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override { + [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint, + Permutation& res) const override { LOG(FATAL) << "get_permutation not implemented"; } void reserve(size_t n) override; size_t byte_size() const override; size_t allocated_bytes() const override; void protect() override; - ColumnPtr replicate(const Offsets & replicate_offsets) const override; + ColumnPtr replicate(const Offsets& replicate_offsets) const override; ColumnPtr convert_to_full_column_if_const() const override; - void get_extremes(Field & min, Field & max) const override { + void get_extremes(Field& min, Field& max) const override { LOG(FATAL) << "get_extremes not implemented"; } /** More efficient methods of manipulation */ - IColumn & get_data() { return *data; } - const IColumn & get_data() const { return *data; } + IColumn& get_data() { return *data; } + const IColumn& get_data() const { return *data; } - IColumn & get_offsets_column() { return *offsets; } - const IColumn & get_offsets_column() const { return *offsets; } + IColumn& get_offsets_column() { return *offsets; } + const IColumn& get_offsets_column() const { return *offsets; } - Offsets & ALWAYS_INLINE get_offsets() { - return assert_cast<ColumnOffsets &>(*offsets).get_data(); + Offsets& ALWAYS_INLINE get_offsets() { + return assert_cast<ColumnOffsets&>(*offsets).get_data(); } - const Offsets & ALWAYS_INLINE get_offsets() const { - return assert_cast<const ColumnOffsets &>(*offsets).get_data(); + const Offsets& ALWAYS_INLINE get_offsets() const { + return assert_cast<const ColumnOffsets&>(*offsets).get_data(); } - const ColumnPtr & get_data_ptr() const { return data; } - ColumnPtr & get_data_ptr() { return data; } + const ColumnPtr& get_data_ptr() const { return data; } + ColumnPtr& get_data_ptr() { return data; } - const ColumnPtr & get_offsets_ptr() const { return offsets; } - ColumnPtr & get_offsets_ptr() { return offsets; } + const ColumnPtr& get_offsets_ptr() const { return offsets; } + ColumnPtr& get_offsets_ptr() { return offsets; } - MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override { + MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override { return scatter_impl<ColumnArray>(num_columns, selector); } @@ -136,7 +140,8 @@ public: callback(data); } - void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override; + void insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) override; void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { LOG(FATAL) << "replace_column_data not implemented"; @@ -154,36 +159,36 @@ private: WrappedPtr offsets; size_t ALWAYS_INLINE offset_at(ssize_t i) const { return get_offsets()[i - 1]; } - size_t ALWAYS_INLINE size_at(ssize_t i) const { return get_offsets()[i] - get_offsets()[i - 1]; } - + size_t ALWAYS_INLINE size_at(ssize_t i) const { + return get_offsets()[i] - get_offsets()[i - 1]; + } /// Multiply values if the nested column is ColumnVector<T>. template <typename T> - ColumnPtr replicate_number(const Offsets & replicate_offsets) const; + ColumnPtr replicate_number(const Offsets& replicate_offsets) const; /// Multiply the values if the nested column is ColumnString. The code is too complicated. - ColumnPtr replicate_string(const Offsets & replicate_offsets) const; + ColumnPtr replicate_string(const Offsets& replicate_offsets) const; /** Non-constant arrays of constant values are quite rare. * Most functions can not work with them, and does not create such columns as a result. * An exception is the function `replicate` (see FunctionsMiscellaneous.h), which has service meaning for the implementation of lambda functions. * Only for its sake is the implementation of the `replicate` method for ColumnArray(ColumnConst). */ - ColumnPtr replicate_const(const Offsets & replicate_offsets) const; + ColumnPtr replicate_const(const Offsets& replicate_offsets) const; /** The following is done by simply replicating of nested columns. */ - ColumnPtr replicate_nullable(const Offsets & replicate_offsets) const; - ColumnPtr replicate_generic(const Offsets & replicate_offsets) const; - + ColumnPtr replicate_nullable(const Offsets& replicate_offsets) const; + ColumnPtr replicate_generic(const Offsets& replicate_offsets) const; /// Specializations for the filter function. template <typename T> - ColumnPtr filter_number(const Filter & filt, ssize_t result_size_hint) const; + ColumnPtr filter_number(const Filter& filt, ssize_t result_size_hint) const; - ColumnPtr filter_string(const Filter & filt, ssize_t result_size_hint) const; - ColumnPtr filter_nullable(const Filter & filt, ssize_t result_size_hint) const; - ColumnPtr filter_generic(const Filter & filt, ssize_t result_size_hint) const; + ColumnPtr filter_string(const Filter& filt, ssize_t result_size_hint) const; + ColumnPtr filter_nullable(const Filter& filt, ssize_t result_size_hint) const; + ColumnPtr filter_generic(const Filter& filt, ssize_t result_size_hint) const; }; } // namespace doris::vectorized diff --git a/be/test/vec/core/column_array_test.cpp b/be/test/vec/core/column_array_test.cpp index a65c7d8c0b..b497b14451 100644 --- a/be/test/vec/core/column_array_test.cpp +++ b/be/test/vec/core/column_array_test.cpp @@ -28,6 +28,34 @@ namespace doris::vectorized { +void check_array_offsets(ColumnPtr arr, const std::vector<IColumn::Offset>& offs) { + auto arr_col = check_and_get_column<ColumnArray>(*arr); + ASSERT_EQ(arr_col->size(), offs.size()); + for (size_t i = 0; i < arr_col->size(); ++i) { + ASSERT_EQ(arr_col->get_offsets()[i], offs[i]); + } +} +template <typename T> +void check_array_data(ColumnPtr arr, const std::vector<T>& data) { + auto arr_col = check_and_get_column<ColumnArray>(*arr); + auto data_col = arr_col->get_data_ptr(); + ASSERT_EQ(data_col->size(), data.size()); + for (size_t i = 0; i < data_col->size(); ++i) { + auto element = data_col->get_data_at(i); + ASSERT_EQ(*((T*)element.data), data[i]); + } +} +template <> +void check_array_data(ColumnPtr arr, const std::vector<std::string>& data) { + auto arr_col = check_and_get_column<ColumnArray>(*arr); + auto data_col = arr_col->get_data_ptr(); + ASSERT_EQ(data_col->size(), data.size()); + for (size_t i = 0; i < data_col->size(); ++i) { + auto element = data_col->get_data_at(i); + ASSERT_EQ(std::string(element.data), data[i]); + } +} + TEST(ColumnArrayTest, IntArrayTest) { auto off_column = ColumnVector<IColumn::Offset>::create(); auto data_column = ColumnVector<int32_t>::create(); @@ -78,4 +106,77 @@ TEST(ColumnArrayTest, StringArrayTest) { } } +TEST(ColumnArrayTest, IntArrayPermuteTest) { + auto off_column = ColumnVector<IColumn::Offset>::create(); + auto data_column = ColumnVector<int32_t>::create(); + // init column array with [[1,2,3],[],[4],[5,6]] + std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6}; + std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6}; + for (size_t i = 1; i < offs.size(); ++i) { + off_column->insert_data((const char*)(&offs[i]), 0); + } + for (auto& v : vals) { + data_column->insert_data((const char*)(&v), 0); + } + ColumnArray array_column(std::move(data_column), std::move(off_column)); + + IColumn::Permutation perm = {3, 2, 1, 0}; + // return array column: [[5,6],[4]]; + auto res1 = array_column.permute(perm, 2); + check_array_offsets(res1, {2, 3}); + check_array_data<int32_t>(res1, {5, 6, 4}); + + // return array column: [[5,6],[4],[],[1,2,3]] + auto res2 = array_column.permute(perm, 0); + check_array_offsets(res2, {2, 3, 3, 6}); + check_array_data<int32_t>(res2, {5, 6, 4, 1, 2, 3}); +} + +TEST(ColumnArrayTest, StringArrayPermuteTest) { + auto off_column = ColumnVector<IColumn::Offset>::create(); + auto data_column = ColumnString::create(); + // init column array with [["abc","d"],["ef"],[], [""]]; + std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4}; + std::vector<std::string> vals = {"abc", "d", "ef", ""}; + for (size_t i = 1; i < offs.size(); ++i) { + off_column->insert_data((const char*)(&offs[i]), 0); + } + for (auto& v : vals) { + data_column->insert_data(v.data(), v.size()); + } + ColumnArray array_column(std::move(data_column), std::move(off_column)); + + IColumn::Permutation perm = {3, 2, 1, 0}; + // return array column: [[""],[]]; + auto res1 = array_column.permute(perm, 2); + check_array_offsets(res1, {1, 1}); + check_array_data<std::string>(res1, {""}); +} + +TEST(ColumnArrayTest, EmptyArrayPermuteTest) { + auto off_column = ColumnVector<IColumn::Offset>::create(); + auto data_column = ColumnVector<int32_t>::create(); + // init column array with [[],[],[],[]] + std::vector<IColumn::Offset> offs = {0, 0, 0, 0, 0}; + std::vector<int32_t> vals = {}; + for (size_t i = 1; i < offs.size(); ++i) { + off_column->insert_data((const char*)(&offs[i]), 0); + } + for (auto& v : vals) { + data_column->insert_data((const char*)(&v), 0); + } + ColumnArray array_column(std::move(data_column), std::move(off_column)); + + IColumn::Permutation perm = {3, 2, 1, 0}; + // return array column: [[],[]]; + auto res1 = array_column.permute(perm, 2); + check_array_offsets(res1, {0, 0}); + check_array_data<int32_t>(res1, {}); + + // return array column: [[],[],[],[]] + auto res2 = array_column.permute(perm, 0); + check_array_offsets(res2, {0, 0, 0, 0}); + check_array_data<int32_t>(res2, {}); +} + } // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org