This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 2e46b373c203c945a4e4f27f412e7fa6dcc652ce Author: dataroaring <98214048+dataroar...@users.noreply.github.com> AuthorDate: Fri Mar 25 14:38:27 2022 +0800 ow num is more accurate than column num in data_types (#8628) --- .../aggregate_function_stddev.h | 2 ++ be/src/vec/data_types/data_type_bitmap.cpp | 30 +++++++++++----------- be/src/vec/data_types/data_type_decimal.cpp | 24 ++++++++--------- be/src/vec/data_types/data_type_hll.cpp | 30 +++++++++++----------- be/src/vec/data_types/data_type_nullable.cpp | 14 +++++----- be/src/vec/data_types/data_type_number_base.cpp | 22 ++++++++-------- be/src/vec/data_types/data_type_string.cpp | 14 +++++----- 7 files changed, 69 insertions(+), 67 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.h b/be/src/vec/aggregate_functions/aggregate_function_stddev.h index 50c4064..83c4041 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_stddev.h +++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.h @@ -28,6 +28,7 @@ namespace doris::vectorized { template <typename T, bool is_stddev> struct BaseData { BaseData() : mean(0.0), m2(0.0), count(0) {} + virtual ~BaseData() {} void write(BufferWritable& buf) const { write_binary(mean, buf); @@ -102,6 +103,7 @@ struct BaseData { template <bool is_stddev> struct BaseDatadecimal { BaseDatadecimal() : mean(0), m2(0), count(0) {} + virtual ~BaseDatadecimal() {} void write(BufferWritable& buf) const { write_binary(mean, buf); diff --git a/be/src/vec/data_types/data_type_bitmap.cpp b/be/src/vec/data_types/data_type_bitmap.cpp index 88a0837..97f34ac 100644 --- a/be/src/vec/data_types/data_type_bitmap.cpp +++ b/be/src/vec/data_types/data_type_bitmap.cpp @@ -24,7 +24,7 @@ namespace doris::vectorized { // binary: <size array> | <bitmap array> -// <size array>: column num | bitmap1 size | bitmap2 size | ... +// <size array>: row num | bitmap1 size | bitmap2 size | ... // <bitmap array>: bitmap1 | bitmap2 | ... int64_t DataTypeBitMap::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); @@ -44,19 +44,19 @@ char* DataTypeBitMap::serialize(const IColumn& column, char* buf) const { auto ptr = column.convert_to_full_column_if_const(); auto& data_column = assert_cast<const ColumnBitmap&>(*ptr); - // serialize the bitmap size array, column num saves at index 0 - const auto column_num = column.size(); - size_t bitmap_size_array[column_num + 1]; - bitmap_size_array[0] = column_num; - for (size_t i = 0; i < column.size(); ++i) { + // serialize the bitmap size array, row num saves at index 0 + const auto row_num = column.size(); + size_t bitmap_size_array[row_num + 1]; + bitmap_size_array[0] = row_num; + for (size_t i = 0; i < row_num; ++i) { auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i)); bitmap_size_array[i + 1] = bitmap.getSizeInBytes(); } - auto allocate_len_size = sizeof(size_t) * (column_num + 1); + auto allocate_len_size = sizeof(size_t) * (row_num + 1); memcpy(buf, bitmap_size_array, allocate_len_size); buf += allocate_len_size; // serialize each bitmap - for (size_t i = 0; i < column_num; ++i) { + for (size_t i = 0; i < row_num; ++i) { auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i)); bitmap.write(buf); buf += bitmap_size_array[i + 1]; @@ -70,18 +70,18 @@ const char* DataTypeBitMap::deserialize(const char* buf, IColumn* column) const auto& data = data_column.get_data(); // deserialize the bitmap size array - size_t column_num = *reinterpret_cast<const size_t*>(buf); + size_t row_num = *reinterpret_cast<const size_t*>(buf); buf += sizeof(size_t); - size_t bitmap_size_array[column_num]; - memcpy(bitmap_size_array, buf, sizeof(size_t) * column_num); - buf += sizeof(size_t) * column_num; + size_t bitmap_size_array[row_num]; + memcpy(bitmap_size_array, buf, sizeof(size_t) * row_num); + buf += sizeof(size_t) * row_num; // deserialize each bitmap - data.resize(column_num); - for (int i = 0; i < column_num ; ++i) { + data.resize(row_num); + for (int i = 0; i < row_num ; ++i) { data[i].deserialize(buf); buf += bitmap_size_array[i]; } - + return buf; } diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp index dee7955..8cd97a3 100644 --- a/be/src/vec/data_types/data_type_decimal.cpp +++ b/be/src/vec/data_types/data_type_decimal.cpp @@ -62,7 +62,7 @@ void DataTypeDecimal<T>::to_string(const IColumn& column, size_t row_num, ostr.write(str.data(), str.size()); } -// binary: column_num | value1 | value2 | ... +// binary: row_num | value1 | value2 | ... template <typename T> int64_t DataTypeDecimal<T>::get_uncompressed_serialized_bytes(const IColumn& column) const { return sizeof(uint32_t) + column.size() * sizeof(FieldType); @@ -70,28 +70,28 @@ int64_t DataTypeDecimal<T>::get_uncompressed_serialized_bytes(const IColumn& col template <typename T> char* DataTypeDecimal<T>::serialize(const IColumn& column, char* buf) const { - // column num - const auto column_num = column.size(); - *reinterpret_cast<uint32_t*>(buf) = column_num; - buf += sizeof(uint32_t); + // row num + const auto row_num = column.size(); + *reinterpret_cast<uint32_t*>(buf) = row_num; + buf += sizeof(uint32_t); // column values auto ptr = column.convert_to_full_column_if_const(); const auto* origin_data = assert_cast<const ColumnType&>(*ptr.get()).get_data().data(); - memcpy(buf, origin_data, column_num * sizeof(FieldType)); - buf += column_num * sizeof(FieldType); + memcpy(buf, origin_data, row_num * sizeof(FieldType)); + buf += row_num * sizeof(FieldType); return buf; } template <typename T> const char* DataTypeDecimal<T>::deserialize(const char* buf, IColumn* column) const { - // column num - uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf); + // row num + uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf); buf += sizeof(uint32_t); // column values auto& container = assert_cast<ColumnType*>(column)->get_data(); - container.resize(column_num); - memcpy(container.data(), buf, column_num * sizeof(FieldType)); - buf += column_num * sizeof(FieldType); + container.resize(row_num); + memcpy(container.data(), buf, row_num * sizeof(FieldType)); + buf += row_num * sizeof(FieldType); return buf; } diff --git a/be/src/vec/data_types/data_type_hll.cpp b/be/src/vec/data_types/data_type_hll.cpp index b9d6c5b..fde7a2a 100644 --- a/be/src/vec/data_types/data_type_hll.cpp +++ b/be/src/vec/data_types/data_type_hll.cpp @@ -23,22 +23,22 @@ namespace doris::vectorized { -// Two part of binary: <column num > + <size array> | <hll data array> -// first: column num | hll1 size | hll2 size | ... +// Two part of binary: <row num > + <size array> | <hll data array> +// first: row num | hll1 size | hll2 size | ... // second: hll1 | hll2 | ... char* DataTypeHLL::serialize(const IColumn& column, char* buf) const { auto ptr = column.convert_to_full_column_if_const(); auto& data_column = assert_cast<const ColumnHLL&>(*ptr); - size_t column_num = column.size(); - size_t hll_size_array[column_num + 1]; - hll_size_array[0] = column_num; + size_t row_num = column.size(); + size_t hll_size_array[row_num + 1]; + hll_size_array[0] = row_num; - auto allocate_len_size = sizeof(size_t) * (column_num + 1); + auto allocate_len_size = sizeof(size_t) * (row_num + 1); char* buf_start = buf; buf += allocate_len_size; - for (size_t i = 0; i < column_num; ++i) { + for (size_t i = 0; i < row_num; ++i) { auto& hll = const_cast<HyperLogLog&>(data_column.get_element(i)); size_t actual_size = hll.serialize(reinterpret_cast<uint8_t*>(buf)); hll_size_array[i + 1] = actual_size; @@ -49,21 +49,21 @@ char* DataTypeHLL::serialize(const IColumn& column, char* buf) const { return buf; } -// Two part of binary: <column num > + <size array> | <hll data array> -// first: column num | hll1 size | hll2 size | ... +// Two part of binary: <row num > + <size array> | <hll data array> +// first: row num | hll1 size | hll2 size | ... // second: hll1 | hll2 | ... const char* DataTypeHLL::deserialize(const char* buf, IColumn* column) const { auto& data_column = assert_cast<ColumnHLL&>(*column); auto& data = data_column.get_data(); - size_t column_num = *reinterpret_cast<const size_t*>(buf); + size_t row_num = *reinterpret_cast<const size_t*>(buf); buf += sizeof(size_t); - size_t hll_size_array[column_num]; - memcpy(hll_size_array, buf, sizeof(size_t) * column_num); - buf += sizeof(size_t) * column_num; + size_t hll_size_array[row_num]; + memcpy(hll_size_array, buf, sizeof(size_t) * row_num); + buf += sizeof(size_t) * row_num; - data.resize(column_num); - for (int i = 0; i < column_num; ++i) { + data.resize(row_num); + for (int i = 0; i < row_num; ++i) { data[i].deserialize(Slice(buf, hll_size_array[i])); buf += hll_size_array[i]; } diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 9adfaaf..52a7f02 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -53,7 +53,7 @@ std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) c } } -// binary: column num | <null array> | <values array> +// binary: row num | <null array> | <values array> // <null array>: is_null1 | is_null2 | ... // <values array>: value1 | value2 | ...> int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& column) const { @@ -68,7 +68,7 @@ char* DataTypeNullable::serialize(const IColumn& column, char* buf) const { auto ptr = column.convert_to_full_column_if_const(); const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get()); - // column num + // row num *reinterpret_cast<uint32_t*>(buf) = column.size(); buf += sizeof(uint32_t); // null flags @@ -80,13 +80,13 @@ char* DataTypeNullable::serialize(const IColumn& column, char* buf) const { const char* DataTypeNullable::deserialize(const char* buf, IColumn* column) const { ColumnNullable* col = assert_cast<ColumnNullable*>(column); - // column num - uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf); + // row num + uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf); buf += sizeof(uint32_t); // null flags - col->get_null_map_data().resize(column_num); - memcpy(col->get_null_map_data().data(), buf, column_num * sizeof(bool)); - buf += column_num * sizeof(bool); + col->get_null_map_data().resize(row_num); + memcpy(col->get_null_map_data().data(), buf, row_num * sizeof(bool)); + buf += row_num * sizeof(bool); // data values IColumn& nested = col->get_nested_column(); return nested_data_type->deserialize(buf, &nested); diff --git a/be/src/vec/data_types/data_type_number_base.cpp b/be/src/vec/data_types/data_type_number_base.cpp index e828c83..d8aa5a3 100644 --- a/be/src/vec/data_types/data_type_number_base.cpp +++ b/be/src/vec/data_types/data_type_number_base.cpp @@ -76,7 +76,7 @@ std::string DataTypeNumberBase<T>::to_string(const IColumn& column, size_t row_n } } -// binary: column num | value1 | value2 | ... +// binary: row num | value1 | value2 | ... template <typename T> int64_t DataTypeNumberBase<T>::get_uncompressed_serialized_bytes(const IColumn& column) const { return sizeof(uint32_t) + column.size() * sizeof(FieldType); @@ -84,29 +84,29 @@ int64_t DataTypeNumberBase<T>::get_uncompressed_serialized_bytes(const IColumn& template <typename T> char* DataTypeNumberBase<T>::serialize(const IColumn& column, char* buf) const { - // column num - const auto column_num = column.size(); - *reinterpret_cast<uint32_t*>(buf) = column_num; + // row num + const auto row_num = column.size(); + *reinterpret_cast<uint32_t*>(buf) = row_num; buf += sizeof(uint32_t); // column data auto ptr = column.convert_to_full_column_if_const(); const auto* origin_data = assert_cast<const ColumnVector<T>&>(*ptr.get()).get_data().data(); - memcpy(buf, origin_data, column_num * sizeof(FieldType)); - buf += column_num * sizeof(FieldType); + memcpy(buf, origin_data, row_num * sizeof(FieldType)); + buf += row_num * sizeof(FieldType); return buf; } template <typename T> const char* DataTypeNumberBase<T>::deserialize(const char* buf, IColumn* column) const { - // column num - uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf); + // row num + uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf); buf += sizeof(uint32_t); // column data auto& container = assert_cast<ColumnVector<T>*>(column)->get_data(); - container.resize(column_num); - memcpy(container.data(), buf, column_num * sizeof(FieldType)); - buf += column_num * sizeof(FieldType); + container.resize(row_num); + memcpy(container.data(), buf, row_num * sizeof(FieldType)); + buf += row_num * sizeof(FieldType); return buf; } diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index 38e86ff..8a63f9b 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -79,7 +79,7 @@ bool DataTypeString::equals(const IDataType& rhs) const { } // binary: <size array> | total length | <value array> -// <size array> : column num | offset1 |offset2 | ... +// <size array> : row num | offset1 |offset2 | ... // <value array> : <value1> | <value2 | ... int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); @@ -91,7 +91,7 @@ char* DataTypeString::serialize(const IColumn& column, char* buf) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast<const ColumnString&>(*ptr.get()); - // column num + // row num *reinterpret_cast<uint32_t*>(buf) = column.size(); buf += sizeof(uint32_t); // offsets @@ -113,13 +113,13 @@ const char* DataTypeString::deserialize(const char* buf, IColumn* column) const ColumnString::Chars& data = column_string->get_chars(); ColumnString::Offsets& offsets = column_string->get_offsets(); - // column num - uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf); + // row num + uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf); buf += sizeof(uint32_t); // offsets - offsets.resize(column_num); - memcpy(offsets.data(), buf, sizeof(uint32_t) * column_num); - buf += sizeof(uint32_t) * column_num; + offsets.resize(row_num); + memcpy(offsets.data(), buf, sizeof(uint32_t) * row_num); + buf += sizeof(uint32_t) * row_num; // total length uint64_t value_len = *reinterpret_cast<const uint64_t*>(buf); buf += sizeof(uint64_t); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org