This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8a8d37c271c [refine](column) ColumnArray does not implement the
insert data function. (#43926)
8a8d37c271c is described below
commit 8a8d37c271c9ba2031ee6dcd024940fb669c04ef
Author: Mryange <[email protected]>
AuthorDate: Mon Nov 25 15:34:47 2024 +0800
[refine](column) ColumnArray does not implement the insert data function.
(#43926)
ColumnArray cannot determine how many elements it contains from a single
block of memory.
The original approach where the result of get data at cannot be inserted
back into insert data. Therefore, this function is not implemented directly.
---
be/src/vec/columns/column.h | 11 ------
be/src/vec/columns/column_array.cpp | 52 ++++-----------------------
be/src/vec/columns/column_complex.h | 3 --
be/src/vec/columns/column_const.cpp | 33 -----------------
be/src/vec/columns/column_const.h | 10 ------
be/src/vec/columns/column_decimal.h | 2 --
be/src/vec/columns/column_dictionary.h | 4 ---
be/src/vec/columns/column_nullable.h | 6 ----
be/src/vec/columns/column_object.h | 5 ---
be/src/vec/columns/column_vector.h | 2 --
be/src/vec/columns/predicate_column.h | 3 --
be/test/vec/columns/column_hash_func_test.cpp | 6 ++--
12 files changed, 9 insertions(+), 128 deletions(-)
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 19462b182bd..96408579a84 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -601,23 +601,12 @@ public:
* To avoid confusion between these cases, we don't have isContiguous
method.
*/
- /// Values in column are represented as continuous memory segment of fixed
size. Implies values_have_fixed_size.
- virtual bool is_fixed_and_contiguous() const { return false; }
-
- /// If is_fixed_and_contiguous, returns the underlying data array,
otherwise throws an exception.
virtual StringRef get_raw_data() const {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Column {} is not a contiguous block of
memory", get_name());
return StringRef {};
}
- /// If values_have_fixed_size, returns size of value, otherwise throw an
exception.
- virtual size_t size_of_value_if_fixed() const {
- throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
- "Values of column {} are not fixed size.",
get_name());
- return 0;
- }
-
/// Returns ratio of values in column, that are equal to default value of
column.
/// Checks only @sample_ratio ratio of rows.
virtual double get_ratio_of_default_rows(double sample_ratio = 1.0) const
{ return 0.0; }
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index bd4464e2caf..0c5a53cdb24 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -151,26 +151,13 @@ void ColumnArray::get(size_t n, Field& res) const {
}
StringRef ColumnArray::get_data_at(size_t n) const {
- /** Returns the range of memory that covers all elements of the array.
- * Works for arrays of fixed length values.
- * For arrays of strings and arrays of arrays, the resulting chunk of
memory may not be one-to-one correspondence with the elements,
- * since it contains only the data laid in succession, but not the
offsets.
- */
- size_t offset_of_first_elem = offset_at(n);
- StringRef first;
- if (offset_of_first_elem < get_data().size()) {
- first = get_data().get_data_at(offset_of_first_elem);
- }
-
- size_t array_size = size_at(n);
- if (array_size == 0) {
- return StringRef(first.data, 0);
- }
-
- size_t offset_of_last_elem = offset_at(n + 1) - 1;
- StringRef last = get_data().get_data_at(offset_of_last_elem);
+ throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+ "Method get_data_at is not supported for " +
get_name());
+}
- return StringRef(first.data, last.data + last.size - first.data);
+void ColumnArray::insert_data(const char* pos, size_t length) {
+ throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+ "Method insert_data is not supported for " +
get_name());
}
bool ColumnArray::is_default_at(size_t n) const {
@@ -178,33 +165,6 @@ bool ColumnArray::is_default_at(size_t n) const {
return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1];
}
-void ColumnArray::insert_data(const char* pos, size_t length) {
- /** Similarly - only for arrays of fixed length values.
- */
- if (!data->is_fixed_and_contiguous()) {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "Method insert_data should have_fixed_size, {}
is not suitable",
- get_name());
- }
-
- size_t field_size = data->size_of_value_if_fixed();
-
- size_t elems = 0;
-
- if (length) {
- const char* end = pos + length;
- for (; pos + field_size <= end; pos += field_size, ++elems)
- data->insert_data(pos, field_size);
-
- if (pos != end)
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "Incorrect length argument for method
ColumnArray::insert_data");
- __builtin_unreachable();
- }
-
- get_offsets().push_back(get_offsets().back() + elems);
-}
-
StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const {
size_t array_size = size_at(n);
diff --git a/be/src/vec/columns/column_complex.h
b/be/src/vec/columns/column_complex.h
index 24b6b7ddbd7..14ae940c9d7 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -207,9 +207,6 @@ public:
// TODO add hash function
}
- bool is_fixed_and_contiguous() const override { return true; }
- size_t size_of_value_if_fixed() const override { return sizeof(T); }
-
StringRef get_raw_data() const override {
return StringRef(reinterpret_cast<const char*>(data.data()),
data.size());
}
diff --git a/be/src/vec/columns/column_const.cpp
b/be/src/vec/columns/column_const.cpp
index a4b3127ad6c..f751f1d8d3e 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -110,39 +110,6 @@ ColumnPtr ColumnConst::permute(const Permutation& perm,
size_t limit) const {
return ColumnConst::create(data, limit);
}
-void ColumnConst::update_crcs_with_value(uint32_t* __restrict hashes,
doris::PrimitiveType type,
- uint32_t rows, uint32_t offset,
- const uint8_t* __restrict null_data)
const {
- DCHECK(null_data == nullptr);
- DCHECK(rows == size());
- auto real_data = data->get_data_at(0);
- if (real_data.data == nullptr) {
- for (int i = 0; i < rows; ++i) {
- hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]);
- }
- } else {
- for (int i = 0; i < rows; ++i) {
- hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size,
type, hashes[i]);
- }
- }
-}
-
-void ColumnConst::update_hashes_with_value(uint64_t* __restrict hashes,
- const uint8_t* __restrict
null_data) const {
- DCHECK(null_data == nullptr);
- auto real_data = data->get_data_at(0);
- auto real_size = size();
- if (real_data.data == nullptr) {
- for (int i = 0; i < real_size; ++i) {
- hashes[i] = HashUtil::xxHash64NullWithSeed(hashes[i]);
- }
- } else {
- for (int i = 0; i < real_size; ++i) {
- hashes[i] = HashUtil::xxHash64WithSeed(real_data.data,
real_data.size, hashes[i]);
- }
- }
-}
-
void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int
/*nan_direction_hint*/,
Permutation& res) const {
res.resize(s);
diff --git a/be/src/vec/columns/column_const.h
b/be/src/vec/columns/column_const.h
index 980d9d64148..ee3860f0635 100644
--- a/be/src/vec/columns/column_const.h
+++ b/be/src/vec/columns/column_const.h
@@ -208,14 +208,6 @@ public:
data->update_hash_with_value(0, hash);
}
- // (TODO.Amory) here may not use column_const update hash, and
PrimitiveType is not used.
- void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType
type, uint32_t rows,
- uint32_t offset = 0,
- const uint8_t* __restrict null_data = nullptr)
const override;
-
- void update_hashes_with_value(uint64_t* __restrict hashes,
- const uint8_t* __restrict null_data) const
override;
-
ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const
override;
size_t filter(const Filter& filter) override;
@@ -263,8 +255,6 @@ public:
bool is_concrete_nullable() const override { return
is_column_nullable(*data); }
bool only_null() const override { return data->is_null_at(0); }
bool is_numeric() const override { return data->is_numeric(); }
- bool is_fixed_and_contiguous() const override { return
data->is_fixed_and_contiguous(); }
- size_t size_of_value_if_fixed() const override { return
data->size_of_value_if_fixed(); }
StringRef get_raw_data() const override { return data->get_raw_data(); }
/// Not part of the common interface.
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index d754831cc56..4c2f69d5ef3 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -106,8 +106,6 @@ public:
bool is_numeric() const override { return false; }
bool is_column_decimal() const override { return true; }
- bool is_fixed_and_contiguous() const override { return true; }
- size_t size_of_value_if_fixed() const override { return sizeof(T); }
size_t size() const override { return data.size(); }
size_t byte_size() const override { return data.size() * sizeof(data[0]); }
diff --git a/be/src/vec/columns/column_dictionary.h
b/be/src/vec/columns/column_dictionary.h
index 69e04973af7..ae7d001a31d 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -158,10 +158,6 @@ public:
__builtin_unreachable();
}
- bool is_fixed_and_contiguous() const override { return true; }
-
- size_t size_of_value_if_fixed() const override { return sizeof(T); }
-
[[noreturn]] StringRef get_raw_data() const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get_raw_data not supported in
ColumnDictionary");
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 2b87aa982ca..252144fbc5f 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -334,18 +334,12 @@ public:
bool is_column_array() const override { return
get_nested_column().is_column_array(); }
bool is_column_map() const override { return
get_nested_column().is_column_map(); }
bool is_column_struct() const override { return
get_nested_column().is_column_struct(); }
- bool is_fixed_and_contiguous() const override { return false; }
bool is_exclusive() const override {
return IColumn::is_exclusive() && nested_column->is_exclusive() &&
get_null_map_column().is_exclusive();
}
- size_t size_of_value_if_fixed() const override {
- return get_null_map_column().size_of_value_if_fixed() +
- nested_column->size_of_value_if_fixed();
- }
-
bool only_null() const override { return size() == 1 && is_null_at(0); }
// used in schema change
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 1c8f38056c9..21bb4469115 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -525,11 +525,6 @@ public:
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"get_raw_data" + get_name());
}
- size_t size_of_value_if_fixed() const override {
- throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
- "size_of_value_if_fixed" + get_name());
- }
-
StringRef get_data_at(size_t) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_data_at"
+ get_name());
}
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index 2676d6d3444..2cb320b6992 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -373,8 +373,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets& offsets) const override;
- bool is_fixed_and_contiguous() const override { return true; }
- size_t size_of_value_if_fixed() const override { return sizeof(T); }
StringRef get_raw_data() const override {
return StringRef(reinterpret_cast<const char*>(data.data()),
data.size());
}
diff --git a/be/src/vec/columns/predicate_column.h
b/be/src/vec/columns/predicate_column.h
index c2c6456d862..7e15656fe1d 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -376,9 +376,6 @@ public:
__builtin_unreachable();
}
- bool is_fixed_and_contiguous() const override { return true; }
- size_t size_of_value_if_fixed() const override { return sizeof(T); }
-
[[noreturn]] StringRef get_raw_data() const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get_raw_data not supported in
PredicateColumnType");
diff --git a/be/test/vec/columns/column_hash_func_test.cpp
b/be/test/vec/columns/column_hash_func_test.cpp
index c49f1e0a578..4db279b6bb2 100644
--- a/be/test/vec/columns/column_hash_func_test.cpp
+++ b/be/test/vec/columns/column_hash_func_test.cpp
@@ -71,11 +71,11 @@ TEST(HashFuncTest, ArrayTypeTest) {
DataTypePtr a = std::make_shared<DataTypeArray>(d);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// xxHash
- EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(xx_hashes));
+
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
- EXPECT_NO_FATAL_FAILURE(
- col_a->update_crcs_with_value(crc_hashes,
PrimitiveType::TYPE_ARRAY, 1));
+
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
+ crc_hashes, PrimitiveType::TYPE_ARRAY, 1));
std::cout << crc_hashes[0] << std::endl;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]