This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new a90684ee322 [fix](columns) fix bug found by UT and add regression test (#48554) (#48854) a90684ee322 is described below commit a90684ee32298fbd0ef1f9e60be8a95c0cb670e3 Author: TengJianPing <tengjianp...@selectdb.com> AuthorDate: Mon Apr 7 15:15:14 2025 +0800 [fix](columns) fix bug found by UT and add regression test (#48554) (#48854) Related PR: pick #48554 --- be/src/vec/columns/column_decimal.cpp | 10 ++--- be/src/vec/columns/column_string.cpp | 33 ++++++++------- be/src/vec/columns/column_string.h | 1 + be/src/vec/columns/columns_number.h | 1 - be/src/vec/core/field.cpp | 6 +-- .../data_types/serde/data_type_decimal_serde.cpp | 2 +- regression-test/data/query_p0/sort/heap_sort.csv | 7 ++++ regression-test/data/query_p0/sort/heap_sort.out | Bin 0 -> 193 bytes .../suites/query_p0/sort/heap_sort.groovy | 46 +++++++++++++++++++++ 9 files changed, 80 insertions(+), 26 deletions(-) diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index cf0193b29e1..306876a393c 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -100,12 +100,10 @@ void ColumnDecimal<T>::serialize_vec_with_null_map(std::vector<StringRef>& keys, } } else { for (size_t i = 0; i < num_rows; ++i) { - if (null_map[i] == 0) { - char* __restrict dest = const_cast<char*>(keys[i].data + +keys[i].size); - memset(dest, 0, 1); - memcpy_fixed<T>(dest + 1, (char*)&data[i]); - keys[i].size += sizeof(T) + sizeof(UInt8); - } + char* __restrict dest = const_cast<char*>(keys[i].data + +keys[i].size); + memset(dest, 0, 1); + memcpy_fixed<T>(dest + 1, (char*)&data[i]); + keys[i].size += sizeof(T) + sizeof(UInt8); } } } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index b61e28ce3f8..734eb2be9aa 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -121,9 +121,9 @@ void ColumnStr<T>::insert_range_from_ignore_overflow(const doris::vectorized::IC const auto& src_concrete = assert_cast<const ColumnStr<T>&>(src); if (start + length > src_concrete.offsets.size()) { - throw doris::Exception( - doris::ErrorCode::INTERNAL_ERROR, - "Parameter out of bound in IColumnStr<T>::insert_range_from method."); + throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, + "Parameter out of bound in " + "IColumnStr<T>::insert_range_from_ignore_overflow method."); } size_t nested_offset = src_concrete.offset_at(start); @@ -292,11 +292,11 @@ void ColumnStr<T>::update_crcs_with_value(uint32_t* __restrict hashes, doris::Pr template <typename T> ColumnPtr ColumnStr<T>::filter(const IColumn::Filter& filt, ssize_t result_size_hint) const { - if (offsets.size() == 0) { - return ColumnStr<T>::create(); - } - if constexpr (std::is_same_v<UInt32, T>) { + if (offsets.size() == 0) { + return ColumnStr<T>::create(); + } + auto res = ColumnStr<T>::create(); Chars& res_chars = res->chars; IColumn::Offsets& res_offsets = res->offsets; @@ -312,13 +312,13 @@ ColumnPtr ColumnStr<T>::filter(const IColumn::Filter& filt, ssize_t result_size_ template <typename T> size_t ColumnStr<T>::filter(const IColumn::Filter& filter) { - CHECK_EQ(filter.size(), offsets.size()); - if (offsets.size() == 0) { - resize(0); - return 0; - } - if constexpr (std::is_same_v<UInt32, T>) { + CHECK_EQ(filter.size(), offsets.size()); + if (offsets.size() == 0) { + resize(0); + return 0; + } + auto res = filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, filter); sanity_check_simple(); return res; @@ -636,8 +636,11 @@ void ColumnStr<T>::compare_internal(size_t rhs_row_id, const IColumn& rhs, int n size_t end = simd::find_one(cmp_res, begin + 1); for (size_t row_id = begin; row_id < end; row_id++) { auto value_a = get_data_at(row_id); - int res = memcmp_small_allow_overflow15(value_a.data, value_a.size, cmp_base.data, - cmp_base.size); + // need to covert to unsigned char, orelse the compare semantic is not consistent + // with other member functions, e.g. get_permutation and compare_at, + // and will result wrong result. + int res = memcmp_small_allow_overflow15((Char*)value_a.data, value_a.size, + (Char*)cmp_base.data, cmp_base.size); cmp_res[row_id] = res != 0; filter[row_id] = res * direction < 0; } diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 33afabfc576..37b9218ed7b 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -224,6 +224,7 @@ public: void insert_many_strings_without_reserve(const StringRef* strings, size_t num) { Char* data = chars.data(); size_t offset = chars.size(); + data += offset; size_t length = 0; const char* ptr = strings[0].data; diff --git a/be/src/vec/columns/columns_number.h b/be/src/vec/columns/columns_number.h index 33dbe4c8c63..9c0005e9d5d 100644 --- a/be/src/vec/columns/columns_number.h +++ b/be/src/vec/columns/columns_number.h @@ -32,7 +32,6 @@ using ColumnUInt8 = ColumnVector<UInt8>; using ColumnUInt16 = ColumnVector<UInt16>; using ColumnUInt32 = ColumnVector<UInt32>; using ColumnUInt64 = ColumnVector<UInt64>; -using ColumnUInt128 = ColumnVector<UInt128>; using ColumnInt8 = ColumnVector<Int8>; using ColumnInt16 = ColumnVector<Int16>; diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp index e652fc2dc9e..ac92605007d 100644 --- a/be/src/vec/core/field.cpp +++ b/be/src/vec/core/field.cpp @@ -174,14 +174,14 @@ DECLARE_DECIMAL_COMPARISON(Decimal256) template <> bool decimal_equal(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) { - return dec_equal(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys); + return dec_equal(x, y, xs, ys); } template <> bool decimal_less(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) { - return dec_less(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys); + return dec_less(x, y, xs, ys); } template <> bool decimal_less_or_equal(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) { - return dec_less_or_equal(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys); + return dec_less_or_equal(x, y, xs, ys); } } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index acb09ee773e..d98f6cae2b0 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -79,7 +79,7 @@ Status DataTypeDecimalSerDe<T>::deserialize_one_cell_from_json(IColumn& column, return Status::OK(); } return Status::InvalidArgument("parse decimal fail, string: '{}', primitive type: '{}'", - std::string(rb.position(), rb.count()).c_str(), + std::string(slice.data, slice.size).c_str(), get_primitive_type()); } diff --git a/regression-test/data/query_p0/sort/heap_sort.csv b/regression-test/data/query_p0/sort/heap_sort.csv new file mode 100644 index 00000000000..8b7670cf735 --- /dev/null +++ b/regression-test/data/query_p0/sort/heap_sort.csv @@ -0,0 +1,7 @@ +1;中文测试a +9;a +3;中文测试b +2;b +5;c +6;d +1;e diff --git a/regression-test/data/query_p0/sort/heap_sort.out b/regression-test/data/query_p0/sort/heap_sort.out new file mode 100644 index 00000000000..6d8b24cc800 Binary files /dev/null and b/regression-test/data/query_p0/sort/heap_sort.out differ diff --git a/regression-test/suites/query_p0/sort/heap_sort.groovy b/regression-test/suites/query_p0/sort/heap_sort.groovy new file mode 100644 index 00000000000..aa29a9893fa --- /dev/null +++ b/regression-test/suites/query_p0/sort/heap_sort.groovy @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate +// and modified by Doris. + +suite("heap_sort") { + sql """ + drop table if exists test_heap_sort; + """ + sql """ + create table if not exists test_heap_sort ( + f1 int, + f2 varchar(65533) + ) properties("replication_num"="1", "disable_auto_compaction"="true"); + """ + streamLoad { + table "test_heap_sort" + + set 'column_separator', ';' + + file 'heap_sort.csv' + + time 10000 // limit inflight 10s + } + sql """ set parallel_pipeline_task_num = 1; """ + sql """ set batch_size = 3; """ + sql """ set force_sort_algorithm = "heap"; """ + qt_select_1 "select * from test_heap_sort order by f1, f2;" + qt_select_2 "select * from test_heap_sort order by f2 limit 3;" +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org