github-actions[bot] commented on code in PR #17330:
URL: https://github.com/apache/doris/pull/17330#discussion_r1122660820


##########
be/src/vec/columns/column_map.cpp:
##########
@@ -135,71 +198,162 @@
     }
 }
 
+StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char 
const*& begin) const {
+    StringRef res(begin, 0);
+    auto keys_ref = keys_column->serialize_value_into_arena(n, arena, begin);
+    res.data = keys_ref.data - res.size;
+    res.size += keys_ref.size;
+    auto value_ref = values_column->serialize_value_into_arena(n, arena, 
begin);
+    res.data = value_ref.data - res.size;
+    res.size += value_ref.size;
+
+    return res;
+}
+
 const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
-    pos = keys->deserialize_and_insert_from_arena(pos);
-    pos = values->deserialize_and_insert_from_arena(pos);
+    pos = keys_column->deserialize_and_insert_from_arena(pos);
+    pos = values_column->deserialize_and_insert_from_arena(pos);
 
     return pos;
 }
 
 void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const {
-    keys->update_hash_with_value(n, hash);
-    values->update_hash_with_value(n, hash);
+    keys_column->update_hash_with_value(n, hash);
+    values_column->update_hash_with_value(n, hash);
 }
 
 void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t 
length) {
-    keys->insert_range_from(*assert_cast<const ColumnMap&>(src).keys, start, 
length);
-    values->insert_range_from(*assert_cast<const ColumnMap&>(src).values, 
start, length);
+    if (length == 0) return;
+
+    const ColumnMap& src_concrete = assert_cast<const ColumnMap&>(src);
+
+    if (start + length > src_concrete.get_offsets().size())

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (start + length > src_concrete.get_offsets().size()) {
   ```
   
   be/src/vec/columns/column_map.cpp:233:
   ```diff
   -                    << ")]";
   +                    << ")]";
   + }
   ```
   



##########
be/src/vec/columns/column_map.cpp:
##########
@@ -25,50 +25,104 @@ namespace doris::vectorized {
 /** A column of map values.
   */
 std::string ColumnMap::get_name() const {
-    return "Map(" + keys->get_name() + ", " + values->get_name() + ")";
+    return "Map(" + keys_column->get_name() + ", " + values_column->get_name() 
+ ")";
 }
 
-ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values)
-        : keys(std::move(keys)), values(std::move(values)) {
-    check_size();
-}
+ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values, 
MutableColumnPtr&& offsets)
+        : keys_column(std::move(keys)),
+          values_column(std::move(values)),
+          offsets_column(std::move(offsets)) {
+    const COffsets* offsets_concrete = typeid_cast<const 
COffsets*>(offsets_column.get());
 
-ColumnArray::Offsets64& ColumnMap::get_offsets() const {
-    const ColumnArray& column_keys = assert_cast<const 
ColumnArray&>(get_keys());
-    // todo . did here check size ?
-    return const_cast<Offsets64&>(column_keys.get_offsets());
-}
+    if (!offsets_concrete) {
+        LOG(FATAL) << "offsets_column must be a ColumnUInt64";
+    }
+
+    if (!offsets_concrete->empty() && keys && values) {
+        auto last_offset = offsets_concrete->get_data().back();
 
-void ColumnMap::check_size() const {
-    const auto* key_array = typeid_cast<const ColumnArray*>(keys.get());
-    const auto* value_array = typeid_cast<const ColumnArray*>(values.get());
-    CHECK(key_array) << "ColumnMap keys can be created only from array";
-    CHECK(value_array) << "ColumnMap values can be created only from array";
-    CHECK_EQ(get_keys_ptr()->size(), get_values_ptr()->size());
+        /// This will also prevent possible overflow in offset.
+        if (keys_column->size() != last_offset) {
+            LOG(FATAL) << "offsets_column has data inconsistent with 
key_column";
+        }
+        if (values_column->size() != last_offset) {
+            LOG(FATAL) << "offsets_column has data inconsistent with 
value_column";
+        }
+    }
 }
 
 // todo. here to resize every row map
 MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const {
-    auto res = ColumnMap::create(keys->clone_resized(to_size), 
values->clone_resized(to_size));
+    auto res = ColumnMap::create(get_keys().clone_empty(), 
get_values().clone_empty(),
+                                 COffsets::create());
+    if (to_size == 0) return res;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (to_size == 0) { return res;
   }
   ```
   



##########
be/src/vec/columns/column_map.cpp:
##########
@@ -135,71 +198,162 @@
     }
 }
 
+StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char 
const*& begin) const {
+    StringRef res(begin, 0);
+    auto keys_ref = keys_column->serialize_value_into_arena(n, arena, begin);
+    res.data = keys_ref.data - res.size;
+    res.size += keys_ref.size;
+    auto value_ref = values_column->serialize_value_into_arena(n, arena, 
begin);
+    res.data = value_ref.data - res.size;
+    res.size += value_ref.size;
+
+    return res;
+}
+
 const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
-    pos = keys->deserialize_and_insert_from_arena(pos);
-    pos = values->deserialize_and_insert_from_arena(pos);
+    pos = keys_column->deserialize_and_insert_from_arena(pos);
+    pos = values_column->deserialize_and_insert_from_arena(pos);
 
     return pos;
 }
 
 void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const {
-    keys->update_hash_with_value(n, hash);
-    values->update_hash_with_value(n, hash);
+    keys_column->update_hash_with_value(n, hash);
+    values_column->update_hash_with_value(n, hash);
 }
 
 void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t 
length) {
-    keys->insert_range_from(*assert_cast<const ColumnMap&>(src).keys, start, 
length);
-    values->insert_range_from(*assert_cast<const ColumnMap&>(src).values, 
start, length);
+    if (length == 0) return;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (length == 0) { return;
   }
   ```
   



##########
be/src/vec/columns/column_map.cpp:
##########
@@ -135,71 +198,162 @@
     }
 }
 
+StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char 
const*& begin) const {
+    StringRef res(begin, 0);
+    auto keys_ref = keys_column->serialize_value_into_arena(n, arena, begin);
+    res.data = keys_ref.data - res.size;
+    res.size += keys_ref.size;
+    auto value_ref = values_column->serialize_value_into_arena(n, arena, 
begin);
+    res.data = value_ref.data - res.size;
+    res.size += value_ref.size;
+
+    return res;
+}
+
 const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
-    pos = keys->deserialize_and_insert_from_arena(pos);
-    pos = values->deserialize_and_insert_from_arena(pos);
+    pos = keys_column->deserialize_and_insert_from_arena(pos);
+    pos = values_column->deserialize_and_insert_from_arena(pos);
 
     return pos;
 }
 
 void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const {
-    keys->update_hash_with_value(n, hash);
-    values->update_hash_with_value(n, hash);
+    keys_column->update_hash_with_value(n, hash);
+    values_column->update_hash_with_value(n, hash);
 }
 
 void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t 
length) {
-    keys->insert_range_from(*assert_cast<const ColumnMap&>(src).keys, start, 
length);
-    values->insert_range_from(*assert_cast<const ColumnMap&>(src).values, 
start, length);
+    if (length == 0) return;
+
+    const ColumnMap& src_concrete = assert_cast<const ColumnMap&>(src);
+
+    if (start + length > src_concrete.get_offsets().size())
+        LOG(FATAL) << "Parameter out of bound in ColumnMap::insert_range_from 
method. [start("
+                   << std::to_string(start) << ") + length(" << 
std::to_string(length)
+                   << ") > offsets.size(" << 
std::to_string(src_concrete.get_offsets().size())
+                   << ")]";
+
+    size_t nested_offset = src_concrete.offset_at(start);
+    size_t nested_length = src_concrete.get_offsets()[start + length - 1] - 
nested_offset;
+
+    keys_column->insert_range_from(src_concrete.get_keys(), nested_offset, 
nested_length);
+    values_column->insert_range_from(src_concrete.get_values(), nested_offset, 
nested_length);
+
+    auto& cur_offsets = get_offsets();
+    const auto& src_offsets = src_concrete.get_offsets();
+
+    if (start == 0 && cur_offsets.empty()) {
+        cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
+    } else {
+        size_t old_size = cur_offsets.size();
+        // -1 is ok, because PaddedPODArray pads zeros on the left.
+        size_t prev_max_offset = cur_offsets.back();
+        cur_offsets.resize(old_size + length);
+
+        for (size_t i = 0; i < length; ++i)
+            cur_offsets[old_size + i] = src_offsets[start + i] - nested_offset 
+ prev_max_offset;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           for (size_t i = 0; i < length; ++i) {
               cur_offsets[old_size + i] = src_offsets[start + i] - 
nested_offset + prev_max_offset;
   }
   ```
   



##########
be/src/vec/columns/column_map.cpp:
##########
@@ -25,50 +25,104 @@
 /** A column of map values.
   */
 std::string ColumnMap::get_name() const {
-    return "Map(" + keys->get_name() + ", " + values->get_name() + ")";
+    return "Map(" + keys_column->get_name() + ", " + values_column->get_name() 
+ ")";
 }
 
-ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values)
-        : keys(std::move(keys)), values(std::move(values)) {
-    check_size();
-}
+ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values, 
MutableColumnPtr&& offsets)
+        : keys_column(std::move(keys)),
+          values_column(std::move(values)),
+          offsets_column(std::move(offsets)) {
+    const COffsets* offsets_concrete = typeid_cast<const 
COffsets*>(offsets_column.get());
 
-ColumnArray::Offsets64& ColumnMap::get_offsets() const {
-    const ColumnArray& column_keys = assert_cast<const 
ColumnArray&>(get_keys());
-    // todo . did here check size ?
-    return const_cast<Offsets64&>(column_keys.get_offsets());
-}
+    if (!offsets_concrete) {
+        LOG(FATAL) << "offsets_column must be a ColumnUInt64";
+    }
+
+    if (!offsets_concrete->empty() && keys && values) {
+        auto last_offset = offsets_concrete->get_data().back();
 
-void ColumnMap::check_size() const {
-    const auto* key_array = typeid_cast<const ColumnArray*>(keys.get());
-    const auto* value_array = typeid_cast<const ColumnArray*>(values.get());
-    CHECK(key_array) << "ColumnMap keys can be created only from array";
-    CHECK(value_array) << "ColumnMap values can be created only from array";
-    CHECK_EQ(get_keys_ptr()->size(), get_values_ptr()->size());
+        /// This will also prevent possible overflow in offset.
+        if (keys_column->size() != last_offset) {
+            LOG(FATAL) << "offsets_column has data inconsistent with 
key_column";
+        }
+        if (values_column->size() != last_offset) {
+            LOG(FATAL) << "offsets_column has data inconsistent with 
value_column";
+        }
+    }
 }
 
 // todo. here to resize every row map
 MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const {
-    auto res = ColumnMap::create(keys->clone_resized(to_size), 
values->clone_resized(to_size));
+    auto res = ColumnMap::create(get_keys().clone_empty(), 
get_values().clone_empty(),
+                                 COffsets::create());
+    if (to_size == 0) return res;
+
+    size_t from_size = size();
+
+    if (to_size <= from_size) {
+        res->get_offsets().assign(get_offsets().begin(), get_offsets().begin() 
+ to_size);
+        res->get_keys().insert_range_from(get_keys(), 0, get_offsets()[to_size 
- 1]);
+        res->get_values().insert_range_from(get_values(), 0, 
get_offsets()[to_size - 1]);
+    } else {
+        /// Copy column and append empty arrays for extra elements.
+        Offset64 offset = 0;
+        if (from_size > 0) {
+            res->get_offsets().assign(get_offsets().begin(), 
get_offsets().end());
+            res->get_keys().insert_range_from(get_keys(), 0, 
get_keys().size());
+            res->get_values().insert_range_from(get_values(), 0, 
get_values().size());
+            offset = get_offsets().back();
+        }
+        res->get_offsets().resize(to_size);
+        for (size_t i = from_size; i < to_size; ++i) res->get_offsets()[i] = 
offset;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           for (size_t i = from_size; i < to_size; ++i) { res->get_offsets()[i] 
= offset;
   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to