This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ad44f2817f2 [opt](exec) change string ser der way to improve 
performance (#30693)
ad44f2817f2 is described below

commit ad44f2817f26c1f12382f883aeb6119e1fe40ced
Author: HappenLee <[email protected]>
AuthorDate: Thu Feb 1 23:07:07 2024 +0800

    [opt](exec) change string ser der way to improve performance (#30693)
---
 be/src/vec/data_types/data_type_string.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/data_types/data_type_string.cpp 
b/be/src/vec/data_types/data_type_string.cpp
index 16440675b24..644bfac4c31 100644
--- a/be/src/vec/data_types/data_type_string.cpp
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -94,8 +94,7 @@ int64_t 
DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column,
         if (auto bytes = data_column.get_chars().size(); bytes <= 
SERIALIZED_MEM_SIZE_LIMIT) {
             size += bytes;
         } else {
-            size += sizeof(size_t) +
-                    std::max(bytes, 
streamvbyte_max_compressedbytes(upper_int32(bytes)));
+            size += sizeof(size_t) + std::max(bytes, 
(size_t)LZ4_compressBound(bytes));
         }
         return size;
     } else {
@@ -142,9 +141,9 @@ char* DataTypeString::serialize(const IColumn& column, 
char* buf, int be_exec_ve
             buf += value_len;
             return buf;
         }
-        auto encode_size = streamvbyte_encode(
-                reinterpret_cast<const 
uint32_t*>(data_column.get_chars().data()),
-                upper_int32(value_len), (uint8_t*)(buf + sizeof(size_t)));
+        auto encode_size =
+                LZ4_compress_fast(data_column.get_chars().raw_data(), (buf + 
sizeof(size_t)),
+                                  value_len, LZ4_compressBound(value_len), 1);
         *reinterpret_cast<size_t*>(buf) = encode_size;
         buf += (sizeof(size_t) + encode_size);
         return buf;
@@ -195,15 +194,14 @@ const char* DataTypeString::deserialize(const char* buf, 
IColumn* column,
         buf += sizeof(uint64_t);
         data.resize(value_len);
 
-        // offsets
+        // values
         if (value_len <= SERIALIZED_MEM_SIZE_LIMIT) {
             memcpy(data.data(), buf, value_len);
             buf += value_len;
         } else {
             size_t encode_size = *reinterpret_cast<const size_t*>(buf);
             buf += sizeof(size_t);
-            streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(data.data()),
-                               upper_int32(value_len));
+            LZ4_decompress_safe(buf, reinterpret_cast<char*>(data.data()), 
encode_size, value_len);
             buf += encode_size;
         }
         return buf;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to