HappenLee commented on a change in pull request #7939:
URL: https://github.com/apache/incubator-doris/pull/7939#discussion_r799195842



##########
File path: be/src/vec/data_types/data_type_nullable.cpp
##########
@@ -53,33 +53,52 @@ std::string DataTypeNullable::to_string(const IColumn& 
column, size_t row_num) c
     }
 }
 
-size_t DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) 
const {
+// binary: column num | <null array> | <values array>
+//  <null array>: is_null1 | is_null2 | ...
+//  <values array>: value1 | value2 | ...>
+int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& 
column) const {
+    int64_t size = sizeof(uint32_t);
+    size += sizeof(bool) * column.size();
+    size += 
nested_data_type->get_uncompressed_serialized_bytes(assert_cast<const 
ColumnNullable&>(column).get_nested_column());
+    return size;
+}
+
+char* DataTypeNullable::serialize(const IColumn& column, char* buf) const {
     auto ptr = column.convert_to_full_column_if_const();
     const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get());
-    pcolumn->mutable_is_null()->Reserve(column.size());
 
+    // column num
+    *reinterpret_cast<uint32_t*>(buf) = column.size();
+    buf += sizeof(uint32_t);
+    // null flags
     for (size_t i = 0; i < column.size(); ++i) {
-        bool is_null = col.is_null_at(i);
-        pcolumn->add_is_null(is_null);
+        *reinterpret_cast<bool*>(buf) = col.is_null_at(i);
+        buf += sizeof(bool);
     }
-
-    return nested_data_type->serialize(col.get_nested_column(), pcolumn) +
-           sizeof(bool) * column.size();
+    // data values
+    return nested_data_type->serialize(col.get_nested_column(), buf);
 }
 
-void DataTypeNullable::deserialize(const PColumn& pcolumn, IColumn* column) 
const {
+const char* DataTypeNullable::deserialize(const char* buf, IColumn* column) 
const {
     ColumnNullable* col = assert_cast<ColumnNullable*>(column);
-    col->get_null_map_data().reserve(pcolumn.is_null_size());
-
-    for (int i = 0; i < pcolumn.is_null_size(); ++i) {
-        if (pcolumn.is_null(i)) {
-            col->get_null_map_data().push_back(1);
-        } else {
-            col->get_null_map_data().push_back(0);
-        }
+    // column num
+    uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf);
+    buf += sizeof(uint32_t);
+    // null flags
+    col->get_null_map_data().reserve(column_num);
+    for (int i = 0; i < column_num; ++i) {

Review comment:
       memcpy to speed up

##########
File path: be/src/vec/data_types/data_type_number_base.cpp
##########
@@ -65,30 +65,40 @@ std::string DataTypeNumberBase<T>::to_string(const IColumn& 
column, size_t row_n
     }
 }
 
+// binary: column num | value1 | value2 | ...
 template <typename T>
-size_t DataTypeNumberBase<T>::serialize(const IColumn& column, PColumn* 
pcolumn) const {
-    const auto column_len = column.size();
-    pcolumn->mutable_binary()->resize(column_len * sizeof(FieldType));
-    auto* data = pcolumn->mutable_binary()->data();
+int64_t DataTypeNumberBase<T>::get_uncompressed_serialized_bytes(const 
IColumn& column) const {
+    return sizeof(uint32_t) + column.size() * sizeof(FieldType);
+}
 
-    // copy the data
+template <typename T>
+char* DataTypeNumberBase<T>::serialize(const IColumn& column, char* buf) const 
{
+    // column num
+    const auto column_num = column.size();

Review comment:
       Why each column need to serialize column sizeļ¼Œone block only need 
register block->rows() once

##########
File path: be/src/vec/data_types/data_type_nullable.cpp
##########
@@ -53,33 +53,52 @@ std::string DataTypeNullable::to_string(const IColumn& 
column, size_t row_num) c
     }
 }
 
-size_t DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) 
const {
+// binary: column num | <null array> | <values array>
+//  <null array>: is_null1 | is_null2 | ...
+//  <values array>: value1 | value2 | ...>
+int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& 
column) const {
+    int64_t size = sizeof(uint32_t);
+    size += sizeof(bool) * column.size();
+    size += 
nested_data_type->get_uncompressed_serialized_bytes(assert_cast<const 
ColumnNullable&>(column).get_nested_column());
+    return size;
+}
+
+char* DataTypeNullable::serialize(const IColumn& column, char* buf) const {
     auto ptr = column.convert_to_full_column_if_const();
     const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get());
-    pcolumn->mutable_is_null()->Reserve(column.size());
 
+    // column num
+    *reinterpret_cast<uint32_t*>(buf) = column.size();
+    buf += sizeof(uint32_t);
+    // null flags
     for (size_t i = 0; i < column.size(); ++i) {
-        bool is_null = col.is_null_at(i);
-        pcolumn->add_is_null(is_null);
+        *reinterpret_cast<bool*>(buf) = col.is_null_at(i);

Review comment:
       TODO: maybe here should use memcpy to speed up




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to