github-actions[bot] commented on code in PR #17330:
URL: https://github.com/apache/doris/pull/17330#discussion_r1122660820
##########
be/src/vec/columns/column_map.cpp:
##########
@@ -135,71 +198,162 @@
}
}
+StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char
const*& begin) const {
+ StringRef res(begin, 0);
+ auto keys_ref = keys_column->serialize_value_into_arena(n, arena, begin);
+ res.data = keys_ref.data - res.size;
+ res.size += keys_ref.size;
+ auto value_ref = values_column->serialize_value_into_arena(n, arena,
begin);
+ res.data = value_ref.data - res.size;
+ res.size += value_ref.size;
+
+ return res;
+}
+
const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
- pos = keys->deserialize_and_insert_from_arena(pos);
- pos = values->deserialize_and_insert_from_arena(pos);
+ pos = keys_column->deserialize_and_insert_from_arena(pos);
+ pos = values_column->deserialize_and_insert_from_arena(pos);
return pos;
}
void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const {
- keys->update_hash_with_value(n, hash);
- values->update_hash_with_value(n, hash);
+ keys_column->update_hash_with_value(n, hash);
+ values_column->update_hash_with_value(n, hash);
}
void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t
length) {
- keys->insert_range_from(*assert_cast<const ColumnMap&>(src).keys, start,
length);
- values->insert_range_from(*assert_cast<const ColumnMap&>(src).values,
start, length);
+ if (length == 0) return;
+
+ const ColumnMap& src_concrete = assert_cast<const ColumnMap&>(src);
+
+ if (start + length > src_concrete.get_offsets().size())
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
if (start + length > src_concrete.get_offsets().size()) {
```
be/src/vec/columns/column_map.cpp:233:
```diff
- << ")]";
+ << ")]";
+ }
```
##########
be/src/vec/columns/column_map.cpp:
##########
@@ -25,50 +25,104 @@ namespace doris::vectorized {
/** A column of map values.
*/
std::string ColumnMap::get_name() const {
- return "Map(" + keys->get_name() + ", " + values->get_name() + ")";
+ return "Map(" + keys_column->get_name() + ", " + values_column->get_name()
+ ")";
}
-ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values)
- : keys(std::move(keys)), values(std::move(values)) {
- check_size();
-}
+ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values,
MutableColumnPtr&& offsets)
+ : keys_column(std::move(keys)),
+ values_column(std::move(values)),
+ offsets_column(std::move(offsets)) {
+ const COffsets* offsets_concrete = typeid_cast<const
COffsets*>(offsets_column.get());
-ColumnArray::Offsets64& ColumnMap::get_offsets() const {
- const ColumnArray& column_keys = assert_cast<const
ColumnArray&>(get_keys());
- // todo . did here check size ?
- return const_cast<Offsets64&>(column_keys.get_offsets());
-}
+ if (!offsets_concrete) {
+ LOG(FATAL) << "offsets_column must be a ColumnUInt64";
+ }
+
+ if (!offsets_concrete->empty() && keys && values) {
+ auto last_offset = offsets_concrete->get_data().back();
-void ColumnMap::check_size() const {
- const auto* key_array = typeid_cast<const ColumnArray*>(keys.get());
- const auto* value_array = typeid_cast<const ColumnArray*>(values.get());
- CHECK(key_array) << "ColumnMap keys can be created only from array";
- CHECK(value_array) << "ColumnMap values can be created only from array";
- CHECK_EQ(get_keys_ptr()->size(), get_values_ptr()->size());
+ /// This will also prevent possible overflow in offset.
+ if (keys_column->size() != last_offset) {
+ LOG(FATAL) << "offsets_column has data inconsistent with
key_column";
+ }
+ if (values_column->size() != last_offset) {
+ LOG(FATAL) << "offsets_column has data inconsistent with
value_column";
+ }
+ }
}
// todo. here to resize every row map
MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const {
- auto res = ColumnMap::create(keys->clone_resized(to_size),
values->clone_resized(to_size));
+ auto res = ColumnMap::create(get_keys().clone_empty(),
get_values().clone_empty(),
+ COffsets::create());
+ if (to_size == 0) return res;
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
if (to_size == 0) { return res;
}
```
##########
be/src/vec/columns/column_map.cpp:
##########
@@ -135,71 +198,162 @@
}
}
+StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char
const*& begin) const {
+ StringRef res(begin, 0);
+ auto keys_ref = keys_column->serialize_value_into_arena(n, arena, begin);
+ res.data = keys_ref.data - res.size;
+ res.size += keys_ref.size;
+ auto value_ref = values_column->serialize_value_into_arena(n, arena,
begin);
+ res.data = value_ref.data - res.size;
+ res.size += value_ref.size;
+
+ return res;
+}
+
const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
- pos = keys->deserialize_and_insert_from_arena(pos);
- pos = values->deserialize_and_insert_from_arena(pos);
+ pos = keys_column->deserialize_and_insert_from_arena(pos);
+ pos = values_column->deserialize_and_insert_from_arena(pos);
return pos;
}
void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const {
- keys->update_hash_with_value(n, hash);
- values->update_hash_with_value(n, hash);
+ keys_column->update_hash_with_value(n, hash);
+ values_column->update_hash_with_value(n, hash);
}
void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t
length) {
- keys->insert_range_from(*assert_cast<const ColumnMap&>(src).keys, start,
length);
- values->insert_range_from(*assert_cast<const ColumnMap&>(src).values,
start, length);
+ if (length == 0) return;
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
if (length == 0) { return;
}
```
##########
be/src/vec/columns/column_map.cpp:
##########
@@ -135,71 +198,162 @@
}
}
+StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char
const*& begin) const {
+ StringRef res(begin, 0);
+ auto keys_ref = keys_column->serialize_value_into_arena(n, arena, begin);
+ res.data = keys_ref.data - res.size;
+ res.size += keys_ref.size;
+ auto value_ref = values_column->serialize_value_into_arena(n, arena,
begin);
+ res.data = value_ref.data - res.size;
+ res.size += value_ref.size;
+
+ return res;
+}
+
const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
- pos = keys->deserialize_and_insert_from_arena(pos);
- pos = values->deserialize_and_insert_from_arena(pos);
+ pos = keys_column->deserialize_and_insert_from_arena(pos);
+ pos = values_column->deserialize_and_insert_from_arena(pos);
return pos;
}
void ColumnMap::update_hash_with_value(size_t n, SipHash& hash) const {
- keys->update_hash_with_value(n, hash);
- values->update_hash_with_value(n, hash);
+ keys_column->update_hash_with_value(n, hash);
+ values_column->update_hash_with_value(n, hash);
}
void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t
length) {
- keys->insert_range_from(*assert_cast<const ColumnMap&>(src).keys, start,
length);
- values->insert_range_from(*assert_cast<const ColumnMap&>(src).values,
start, length);
+ if (length == 0) return;
+
+ const ColumnMap& src_concrete = assert_cast<const ColumnMap&>(src);
+
+ if (start + length > src_concrete.get_offsets().size())
+ LOG(FATAL) << "Parameter out of bound in ColumnMap::insert_range_from
method. [start("
+ << std::to_string(start) << ") + length(" <<
std::to_string(length)
+ << ") > offsets.size(" <<
std::to_string(src_concrete.get_offsets().size())
+ << ")]";
+
+ size_t nested_offset = src_concrete.offset_at(start);
+ size_t nested_length = src_concrete.get_offsets()[start + length - 1] -
nested_offset;
+
+ keys_column->insert_range_from(src_concrete.get_keys(), nested_offset,
nested_length);
+ values_column->insert_range_from(src_concrete.get_values(), nested_offset,
nested_length);
+
+ auto& cur_offsets = get_offsets();
+ const auto& src_offsets = src_concrete.get_offsets();
+
+ if (start == 0 && cur_offsets.empty()) {
+ cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
+ } else {
+ size_t old_size = cur_offsets.size();
+ // -1 is ok, because PaddedPODArray pads zeros on the left.
+ size_t prev_max_offset = cur_offsets.back();
+ cur_offsets.resize(old_size + length);
+
+ for (size_t i = 0; i < length; ++i)
+ cur_offsets[old_size + i] = src_offsets[start + i] - nested_offset
+ prev_max_offset;
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
for (size_t i = 0; i < length; ++i) {
cur_offsets[old_size + i] = src_offsets[start + i] -
nested_offset + prev_max_offset;
}
```
##########
be/src/vec/columns/column_map.cpp:
##########
@@ -25,50 +25,104 @@
/** A column of map values.
*/
std::string ColumnMap::get_name() const {
- return "Map(" + keys->get_name() + ", " + values->get_name() + ")";
+ return "Map(" + keys_column->get_name() + ", " + values_column->get_name()
+ ")";
}
-ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values)
- : keys(std::move(keys)), values(std::move(values)) {
- check_size();
-}
+ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values,
MutableColumnPtr&& offsets)
+ : keys_column(std::move(keys)),
+ values_column(std::move(values)),
+ offsets_column(std::move(offsets)) {
+ const COffsets* offsets_concrete = typeid_cast<const
COffsets*>(offsets_column.get());
-ColumnArray::Offsets64& ColumnMap::get_offsets() const {
- const ColumnArray& column_keys = assert_cast<const
ColumnArray&>(get_keys());
- // todo . did here check size ?
- return const_cast<Offsets64&>(column_keys.get_offsets());
-}
+ if (!offsets_concrete) {
+ LOG(FATAL) << "offsets_column must be a ColumnUInt64";
+ }
+
+ if (!offsets_concrete->empty() && keys && values) {
+ auto last_offset = offsets_concrete->get_data().back();
-void ColumnMap::check_size() const {
- const auto* key_array = typeid_cast<const ColumnArray*>(keys.get());
- const auto* value_array = typeid_cast<const ColumnArray*>(values.get());
- CHECK(key_array) << "ColumnMap keys can be created only from array";
- CHECK(value_array) << "ColumnMap values can be created only from array";
- CHECK_EQ(get_keys_ptr()->size(), get_values_ptr()->size());
+ /// This will also prevent possible overflow in offset.
+ if (keys_column->size() != last_offset) {
+ LOG(FATAL) << "offsets_column has data inconsistent with
key_column";
+ }
+ if (values_column->size() != last_offset) {
+ LOG(FATAL) << "offsets_column has data inconsistent with
value_column";
+ }
+ }
}
// todo. here to resize every row map
MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const {
- auto res = ColumnMap::create(keys->clone_resized(to_size),
values->clone_resized(to_size));
+ auto res = ColumnMap::create(get_keys().clone_empty(),
get_values().clone_empty(),
+ COffsets::create());
+ if (to_size == 0) return res;
+
+ size_t from_size = size();
+
+ if (to_size <= from_size) {
+ res->get_offsets().assign(get_offsets().begin(), get_offsets().begin()
+ to_size);
+ res->get_keys().insert_range_from(get_keys(), 0, get_offsets()[to_size
- 1]);
+ res->get_values().insert_range_from(get_values(), 0,
get_offsets()[to_size - 1]);
+ } else {
+ /// Copy column and append empty arrays for extra elements.
+ Offset64 offset = 0;
+ if (from_size > 0) {
+ res->get_offsets().assign(get_offsets().begin(),
get_offsets().end());
+ res->get_keys().insert_range_from(get_keys(), 0,
get_keys().size());
+ res->get_values().insert_range_from(get_values(), 0,
get_values().size());
+ offset = get_offsets().back();
+ }
+ res->get_offsets().resize(to_size);
+ for (size_t i = from_size; i < to_size; ++i) res->get_offsets()[i] =
offset;
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
for (size_t i = from_size; i < to_size; ++i) { res->get_offsets()[i]
= offset;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]