This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8be13b0e2db [Refactor](block) Refactor interface of shrink column 
(#44046)
8be13b0e2db is described below

commit 8be13b0e2db1c589414b8a4f046a4c102093b123
Author: zclllhhjj <zhaochan...@selectdb.com>
AuthorDate: Tue Nov 19 19:24:00 2024 +0800

    [Refactor](block) Refactor interface of shrink column (#44046)
    
    Refactor interface of shrink column
---
 be/src/vec/columns/column.h                | 13 ++------
 be/src/vec/columns/column_array.cpp        | 12 ++------
 be/src/vec/columns/column_array.h          |  3 +-
 be/src/vec/columns/column_map.cpp          | 24 ++-------------
 be/src/vec/columns/column_map.h            |  3 +-
 be/src/vec/columns/column_nullable.cpp     | 13 ++------
 be/src/vec/columns/column_nullable.h       |  4 +--
 be/src/vec/columns/column_object.h         |  6 ----
 be/src/vec/columns/column_string.cpp       | 31 ++++++++++++-------
 be/src/vec/columns/column_string.h         |  4 +--
 be/src/vec/columns/column_struct.cpp       | 24 ++-------------
 be/src/vec/columns/column_struct.h         |  3 +-
 be/src/vec/core/block.cpp                  |  2 +-
 be/test/vec/columns/column_string_test.cpp | 48 ++++++++++++++++++++++++++++++
 14 files changed, 90 insertions(+), 100 deletions(-)

diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index d45757b329c..975827bd13e 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -126,16 +126,9 @@ public:
         return nullptr;
     }
 
-    // shrink the end zeros for CHAR type or ARRAY<CHAR> type
-    virtual MutablePtr get_shrinked_column() {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
-                               "Method get_shrinked_column is not supported 
for " + get_name());
-        return nullptr;
-    }
-
-    // check the column whether could shrinked
-    // now support only in char type, or the nested type in complex type: 
array{char}, struct{char}, map{char}
-    virtual bool could_shrinked_column() { return false; }
+    // shrink the end zeros for ColumnStr(also for who has it nested). so nest 
column will call it for all nested.
+    // for non-str col, will reach here(do nothing). only ColumnStr will 
really shrink itself.
+    virtual void shrink_padding_chars() {}
 
     /// Some columns may require finalization before using of other operations.
     virtual void finalize() {}
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index e66e016381e..bd4464e2caf 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -79,16 +79,8 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : 
data(std::move(nest
     offsets = ColumnOffsets::create();
 }
 
-bool ColumnArray::could_shrinked_column() {
-    return data->could_shrinked_column();
-}
-
-MutableColumnPtr ColumnArray::get_shrinked_column() {
-    if (could_shrinked_column()) {
-        return ColumnArray::create(data->get_shrinked_column(), 
offsets->assume_mutable());
-    } else {
-        return ColumnArray::create(data->assume_mutable(), 
offsets->assume_mutable());
-    }
+void ColumnArray::shrink_padding_chars() {
+    data->shrink_padding_chars();
 }
 
 std::string ColumnArray::get_name() const {
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index a3c79ffaa02..4dbc8e91e52 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -112,8 +112,7 @@ public:
         return Base::create(std::forward<Args>(args)...);
     }
 
-    MutableColumnPtr get_shrinked_column() override;
-    bool could_shrinked_column() override;
+    void shrink_padding_chars() override;
 
     /** On the index i there is an offset to the beginning of the i + 1 -th 
element. */
     using ColumnOffsets = ColumnVector<Offset64>;
diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index 85964ca967b..eb3b431a229 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -502,27 +502,9 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) 
const {
     return res;
 }
 
-bool ColumnMap::could_shrinked_column() {
-    return keys_column->could_shrinked_column() || 
values_column->could_shrinked_column();
-}
-
-MutableColumnPtr ColumnMap::get_shrinked_column() {
-    MutableColumns new_columns(2);
-
-    if (keys_column->could_shrinked_column()) {
-        new_columns[0] = keys_column->get_shrinked_column();
-    } else {
-        new_columns[0] = keys_column->get_ptr();
-    }
-
-    if (values_column->could_shrinked_column()) {
-        new_columns[1] = values_column->get_shrinked_column();
-    } else {
-        new_columns[1] = values_column->get_ptr();
-    }
-
-    return ColumnMap::create(new_columns[0]->assume_mutable(), 
new_columns[1]->assume_mutable(),
-                             offsets_column->assume_mutable());
+void ColumnMap::shrink_padding_chars() {
+    keys_column->shrink_padding_chars();
+    values_column->shrink_padding_chars();
 }
 
 void ColumnMap::reserve(size_t n) {
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 08eee11c436..ae482a2d4e0 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -118,8 +118,7 @@ public:
     const char* deserialize_and_insert_from_arena(const char* pos) override;
 
     void update_hash_with_value(size_t n, SipHash& hash) const override;
-    MutableColumnPtr get_shrinked_column() override;
-    bool could_shrinked_column() override;
+    void shrink_padding_chars() override;
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
     size_t filter(const Filter& filter) override;
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index 8fffe1dfe65..493aabdae31 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -51,17 +51,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& 
nested_column_, MutableColumnP
     _need_update_has_null = true;
 }
 
-bool ColumnNullable::could_shrinked_column() {
-    return get_nested_column_ptr()->could_shrinked_column();
-}
-
-MutableColumnPtr ColumnNullable::get_shrinked_column() {
-    if (could_shrinked_column()) {
-        return 
ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(),
-                                      get_null_map_column_ptr());
-    } else {
-        return ColumnNullable::create(get_nested_column_ptr(), 
get_null_map_column_ptr());
-    }
+void ColumnNullable::shrink_padding_chars() {
+    get_nested_column_ptr()->shrink_padding_chars();
 }
 
 void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, 
uint64_t& hash,
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index 35787cd52d8..6096fd4b669 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -143,8 +143,8 @@ public:
         return Base::create(std::forward<Args>(args)...);
     }
 
-    MutableColumnPtr get_shrinked_column() override;
-    bool could_shrinked_column() override;
+    void shrink_padding_chars() override;
+
     bool is_variable_length() const override { return 
nested_column->is_variable_length(); }
 
     std::string get_name() const override { return "Nullable(" + 
nested_column->get_name() + ")"; }
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index d7dd0a88da8..244692e9ddd 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -446,12 +446,6 @@ public:
     void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
                                const uint8_t* __restrict null_data) const 
override;
 
-    // Not implemented
-    MutableColumnPtr get_shrinked_column() override {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
-                               "get_shrinked_column" + get_name());
-    }
-
     Int64 get_int(size_t /*n*/) const override {
         throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_int" + 
get_name());
     }
diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 6eb3e45b2e0..3caa194551b 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -22,6 +22,7 @@
 
 #include <algorithm>
 #include <boost/iterator/iterator_facade.hpp>
+#include <cstring>
 
 #include "util/memcpy_inlined.h"
 #include "util/simd/bits.h"
@@ -81,16 +82,26 @@ MutableColumnPtr ColumnStr<T>::clone_resized(size_t 
to_size) const {
 }
 
 template <typename T>
-MutableColumnPtr ColumnStr<T>::get_shrinked_column() {
-    auto shrinked_column = ColumnStr<T>::create();
-    shrinked_column->get_offsets().reserve(offsets.size());
-    shrinked_column->get_chars().reserve(chars.size());
-    for (int i = 0; i < size(); i++) {
-        StringRef str = get_data_at(i);
-        reinterpret_cast<ColumnStr<T>*>(shrinked_column.get())
-                ->insert_data(str.data, strnlen(str.data, str.size));
-    }
-    return shrinked_column;
+void ColumnStr<T>::shrink_padding_chars() {
+    if (size() == 0) {
+        return;
+    }
+    char* data = reinterpret_cast<char*>(chars.data());
+    auto* offset = offsets.data();
+    size_t size = offsets.size();
+
+    // deal the 0-th element. no need to move.
+    auto next_start = offset[0];
+    offset[0] = strnlen(data, size_at(0));
+    for (size_t i = 1; i < size; i++) {
+        // get the i-th length and whole move it to cover the last's trailing 
void
+        auto length = strnlen(data + next_start, offset[i] - next_start);
+        memmove(data + offset[i - 1], data + next_start, length);
+        // offset i will be changed. so save the old value for (i+1)-th to get 
its length.
+        next_start = offset[i];
+        offset[i] = offset[i - 1] + length;
+    }
+    chars.resize_fill(offsets.back()); // just call it to shrink memory here. 
no possible to expand.
 }
 
 // This method is only called by MutableBlock::merge_ignore_overflow
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 0881c887288..a72a3ec5cdc 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -85,6 +85,7 @@ private:
     /// For convenience, every string ends with terminating zero byte. Note 
that strings could contain zero bytes in the middle.
     Chars chars;
 
+    // Start position of i-th element.
     size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; }
 
     /// Size of i-th element, including terminating zero.
@@ -117,8 +118,7 @@ public:
 
     MutableColumnPtr clone_resized(size_t to_size) const override;
 
-    MutableColumnPtr get_shrinked_column() override;
-    bool could_shrinked_column() override { return true; }
+    void shrink_padding_chars() override;
 
     Field operator[](size_t n) const override {
         assert(n < size());
diff --git a/be/src/vec/columns/column_struct.cpp 
b/be/src/vec/columns/column_struct.cpp
index ec0c5e6a687..3a238a09c0d 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -313,28 +313,10 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) 
const {
     return ColumnStruct::create(new_columns);
 }
 
-bool ColumnStruct::could_shrinked_column() {
-    const size_t tuple_size = columns.size();
-    for (size_t i = 0; i < tuple_size; ++i) {
-        if (columns[i]->could_shrinked_column()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-MutableColumnPtr ColumnStruct::get_shrinked_column() {
-    const size_t tuple_size = columns.size();
-    MutableColumns new_columns(tuple_size);
-
-    for (size_t i = 0; i < tuple_size; ++i) {
-        if (columns[i]->could_shrinked_column()) {
-            new_columns[i] = columns[i]->get_shrinked_column();
-        } else {
-            new_columns[i] = columns[i]->get_ptr();
-        }
+void ColumnStruct::shrink_padding_chars() {
+    for (auto& column : columns) {
+        column->shrink_padding_chars();
     }
-    return ColumnStruct::create(std::move(new_columns));
 }
 
 void ColumnStruct::reserve(size_t n) {
diff --git a/be/src/vec/columns/column_struct.h 
b/be/src/vec/columns/column_struct.h
index 15836604792..e9f8014d9db 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -151,8 +151,7 @@ public:
 
     int compare_at(size_t n, size_t m, const IColumn& rhs_, int 
nan_direction_hint) const override;
 
-    MutableColumnPtr get_shrinked_column() override;
-    bool could_shrinked_column() override;
+    void shrink_padding_chars() override;
 
     void reserve(size_t n) override;
     void resize(size_t n) override;
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 11075335fb1..b15b83cf77e 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -1215,7 +1215,7 @@ void Block::shrink_char_type_column_suffix_zero(const 
std::vector<size_t>& char_
     for (auto idx : char_type_idx) {
         if (idx < data.size()) {
             auto& col_and_name = this->get_by_position(idx);
-            col_and_name.column = 
col_and_name.column->assume_mutable()->get_shrinked_column();
+            col_and_name.column->assume_mutable()->shrink_padding_chars();
         }
     }
 }
diff --git a/be/test/vec/columns/column_string_test.cpp 
b/be/test/vec/columns/column_string_test.cpp
new file mode 100644
index 00000000000..07195afe1cf
--- /dev/null
+++ b/be/test/vec/columns/column_string_test.cpp
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_string.h"
+
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+
+#include "vec/common/string_ref.h"
+#include "vec/core/types.h"
+
+using namespace doris;
+using namespace doris::vectorized;
+
+TEST(ColumnStringTest, shrink_padding_chars) {
+    ColumnString::MutablePtr col = ColumnString::create();
+    col->insert_data("123\0   ", 7);
+    col->insert_data("456\0xx", 6);
+    col->insert_data("78", 2);
+    col->shrink_padding_chars();
+
+    EXPECT_EQ(col->size(), 3);
+    EXPECT_EQ(col->get_data_at(0), StringRef("123"));
+    EXPECT_EQ(col->get_data_at(0).size, 3);
+    EXPECT_EQ(col->get_data_at(1), StringRef("456"));
+    EXPECT_EQ(col->get_data_at(1).size, 3);
+    EXPECT_EQ(col->get_data_at(2), StringRef("78"));
+    EXPECT_EQ(col->get_data_at(2).size, 2);
+
+    col->insert_data("xyz", 2); // only xy
+
+    EXPECT_EQ(col->size(), 4);
+    EXPECT_EQ(col->get_data_at(3), StringRef("xy"));
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to