HappenLee commented on a change in pull request #7939: URL: https://github.com/apache/incubator-doris/pull/7939#discussion_r799195842
########## File path: be/src/vec/data_types/data_type_nullable.cpp ########## @@ -53,33 +53,52 @@ std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) c } } -size_t DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) const { +// binary: column num | <null array> | <values array> +// <null array>: is_null1 | is_null2 | ... +// <values array>: value1 | value2 | ...> +int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& column) const { + int64_t size = sizeof(uint32_t); + size += sizeof(bool) * column.size(); + size += nested_data_type->get_uncompressed_serialized_bytes(assert_cast<const ColumnNullable&>(column).get_nested_column()); + return size; +} + +char* DataTypeNullable::serialize(const IColumn& column, char* buf) const { auto ptr = column.convert_to_full_column_if_const(); const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get()); - pcolumn->mutable_is_null()->Reserve(column.size()); + // column num + *reinterpret_cast<uint32_t*>(buf) = column.size(); + buf += sizeof(uint32_t); + // null flags for (size_t i = 0; i < column.size(); ++i) { - bool is_null = col.is_null_at(i); - pcolumn->add_is_null(is_null); + *reinterpret_cast<bool*>(buf) = col.is_null_at(i); + buf += sizeof(bool); } - - return nested_data_type->serialize(col.get_nested_column(), pcolumn) + - sizeof(bool) * column.size(); + // data values + return nested_data_type->serialize(col.get_nested_column(), buf); } -void DataTypeNullable::deserialize(const PColumn& pcolumn, IColumn* column) const { +const char* DataTypeNullable::deserialize(const char* buf, IColumn* column) const { ColumnNullable* col = assert_cast<ColumnNullable*>(column); - col->get_null_map_data().reserve(pcolumn.is_null_size()); - - for (int i = 0; i < pcolumn.is_null_size(); ++i) { - if (pcolumn.is_null(i)) { - col->get_null_map_data().push_back(1); - } else { - col->get_null_map_data().push_back(0); - } + // column num + uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf); + buf += sizeof(uint32_t); + // null flags + col->get_null_map_data().reserve(column_num); + for (int i = 0; i < column_num; ++i) { Review comment: memcpy to speed up ########## File path: be/src/vec/data_types/data_type_number_base.cpp ########## @@ -65,30 +65,40 @@ std::string DataTypeNumberBase<T>::to_string(const IColumn& column, size_t row_n } } +// binary: column num | value1 | value2 | ... template <typename T> -size_t DataTypeNumberBase<T>::serialize(const IColumn& column, PColumn* pcolumn) const { - const auto column_len = column.size(); - pcolumn->mutable_binary()->resize(column_len * sizeof(FieldType)); - auto* data = pcolumn->mutable_binary()->data(); +int64_t DataTypeNumberBase<T>::get_uncompressed_serialized_bytes(const IColumn& column) const { + return sizeof(uint32_t) + column.size() * sizeof(FieldType); +} - // copy the data +template <typename T> +char* DataTypeNumberBase<T>::serialize(const IColumn& column, char* buf) const { + // column num + const auto column_num = column.size(); Review comment: Why each column need to serialize column sizeļ¼one block only need register block->rows() once ########## File path: be/src/vec/data_types/data_type_nullable.cpp ########## @@ -53,33 +53,52 @@ std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) c } } -size_t DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) const { +// binary: column num | <null array> | <values array> +// <null array>: is_null1 | is_null2 | ... +// <values array>: value1 | value2 | ...> +int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& column) const { + int64_t size = sizeof(uint32_t); + size += sizeof(bool) * column.size(); + size += nested_data_type->get_uncompressed_serialized_bytes(assert_cast<const ColumnNullable&>(column).get_nested_column()); + return size; +} + +char* DataTypeNullable::serialize(const IColumn& column, char* buf) const { auto ptr = column.convert_to_full_column_if_const(); const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get()); - pcolumn->mutable_is_null()->Reserve(column.size()); + // column num + *reinterpret_cast<uint32_t*>(buf) = column.size(); + buf += sizeof(uint32_t); + // null flags for (size_t i = 0; i < column.size(); ++i) { - bool is_null = col.is_null_at(i); - pcolumn->add_is_null(is_null); + *reinterpret_cast<bool*>(buf) = col.is_null_at(i); Review comment: TODO: maybe here should use memcpy to speed up -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org