This is an automated email from the ASF dual-hosted git repository. zhangstar333 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 72cffc57586 [Test] (BE-UT)Add sorter BE-UT and foundational mocks (#47265) 72cffc57586 is described below commit 72cffc5758632dd9b82decda0fadcaca2af25088 Author: Mryange <yanxuech...@selectdb.com> AuthorDate: Thu Jan 23 14:49:43 2025 +0800 [Test] (BE-UT)Add sorter BE-UT and foundational mocks (#47265) ### What problem does this PR solve? 1. Add BEUT tests for the sorter. 2. Introduce some mock basic types for future development. 3. Introduce BE_BENCHMARK because some functions are only defined in BE-UT, to prevent BENCHMARK compilation from failing. --- be/CMakeLists.txt | 3 + be/src/common/be_mock_util.h | 42 +++++++ be/src/pipeline/exec/exchange_sink_buffer.h | 2 +- be/src/runtime/descriptors.cpp | 17 +++ be/src/runtime/descriptors.h | 21 +++- be/src/runtime/runtime_state.h | 5 +- be/test/testutil/mock/mock_descriptors.h | 59 ++++++++++ be/test/testutil/mock/mock_runtime_state.h | 33 ++++++ be/test/testutil/mock/mock_slot_ref.h | 50 ++++++++ be/test/vec/exec/sort/sort_test.cpp | 175 ++++++++++++++++++++++++++++ 10 files changed, 398 insertions(+), 9 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index d476af8e211..223c98d9365 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -713,6 +713,9 @@ endif () # use this to avoid some runtime tracker. reuse BE_TEST symbol, no need another. if (BUILD_BENCHMARK) add_definitions(-DBE_TEST) +# The separate BENCHMARK marker is introduced here because +# some BE UTs mock certain functions, and BENCHMARK cannot find their definitions. + add_definitions(-DBE_BENCHMARK) endif() get_directory_property(COMPILER_FLAGS COMPILE_OPTIONS) diff --git a/be/src/common/be_mock_util.h b/be/src/common/be_mock_util.h new file mode 100644 index 00000000000..8633937b41e --- /dev/null +++ b/be/src/common/be_mock_util.h @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +// #define BE_TEST + +#ifdef BE_TEST +#define MOCK_FUNCTION virtual +#else +#define MOCK_FUNCTION +#endif + +#ifdef BE_TEST +#define MOCK_DEFINE(str) str +#else +#define MOCK_DEFINE(str) +#endif + +#ifdef BE_TEST +#define MOCK_REMOVE(str) +#else +#define MOCK_REMOVE(str) str +#endif + +/* +#include "common/be_mock_util.h" +*/ diff --git a/be/src/pipeline/exec/exchange_sink_buffer.h b/be/src/pipeline/exec/exchange_sink_buffer.h index 458c7c3f66e..80e5dc42289 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.h +++ b/be/src/pipeline/exec/exchange_sink_buffer.h @@ -207,7 +207,7 @@ private: +-----------------+ +-----------------+ +-----------------+ */ -#ifdef BE_TEST +#if defined(BE_TEST) && !defined(BE_BENCHMARK) void transmit_blockv2(PBackendService_Stub& stub, std::unique_ptr<AutoReleaseClosure<PTransmitDataParams, ExchangeSendCallback<PTransmitDataResult>>> diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index bea11feb916..c19abf62924 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -77,6 +77,23 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc) _column_paths(pdesc.column_paths().begin(), pdesc.column_paths().end()), _is_auto_increment(pdesc.is_auto_increment()) {} +#ifdef BE_TEST +SlotDescriptor::SlotDescriptor() + : _id(0), + _type(TypeDescriptor {}), + _parent(0), + _col_pos(0), + _is_nullable(false), + _col_unique_id(0), + _col_type(PrimitiveType {}), + _slot_idx(0), + _field_idx(-1), + _is_materialized(false), + _is_key(false), + _need_materialize(true), + _is_auto_increment(false) {} +#endif + void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const { pslot->set_id(_id); pslot->set_parent(_parent); diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index b807c567543..3bf755ce67c 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -32,14 +32,15 @@ #include <utility> #include <vector> +#include "common/be_mock_util.h" #include "common/compiler_util.h" // IWYU pragma: keep #include "common/global_types.h" +#include "common/object_pool.h" #include "common/status.h" #include "olap/utils.h" #include "runtime/define_primitive_type.h" #include "runtime/types.h" #include "vec/data_types/data_type.h" - namespace google::protobuf { template <typename Element> class RepeatedField; @@ -53,7 +54,7 @@ class PSlotDescriptor; class SlotDescriptor { public: - // virtual ~SlotDescriptor() {}; + MOCK_DEFINE(virtual ~SlotDescriptor() = default;) SlotId id() const { return _id; } const TypeDescriptor& type() const { return _type; } TupleId parent() const { return _parent; } @@ -74,7 +75,7 @@ public: vectorized::MutableColumnPtr get_empty_mutable_column() const; - doris::vectorized::DataTypePtr get_data_type_ptr() const; + MOCK_FUNCTION doris::vectorized::DataTypePtr get_data_type_ptr() const; int32_t col_unique_id() const { return _col_unique_id; } @@ -131,6 +132,7 @@ private: SlotDescriptor(const TSlotDescriptor& tdesc); SlotDescriptor(const PSlotDescriptor& pdesc); + MOCK_DEFINE(SlotDescriptor();) }; // Base class for table descriptors. @@ -342,15 +344,18 @@ public: TupleDescriptor(TupleDescriptor&&) = delete; void operator=(const TupleDescriptor&) = delete; - ~TupleDescriptor() { + MOCK_DEFINE(virtual) ~TupleDescriptor() { if (_own_slots) { for (SlotDescriptor* slot : _slots) { delete slot; } } } + + MOCK_DEFINE(TupleDescriptor() : _id {0} {};) + int num_materialized_slots() const { return _num_materialized_slots; } - const std::vector<SlotDescriptor*>& slots() const { return _slots; } + MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return _slots; } bool has_varlen_slots() const { return _has_varlen_slots; } const TableDescriptor* table_desc() const { return _table_desc; } @@ -461,6 +466,8 @@ public: // dummy descriptor, needed for the JNI EvalPredicate() function RowDescriptor() = default; + MOCK_DEFINE(virtual ~RowDescriptor() = default;) + int num_materialized_slots() const { return _num_materialized_slots; } int num_slots() const { return _num_slots; } @@ -474,7 +481,9 @@ public: bool has_varlen_slots() const { return _has_varlen_slots; } // Return descriptors for all tuples in this row, in order of appearance. - const std::vector<TupleDescriptor*>& tuple_descriptors() const { return _tuple_desc_map; } + MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() const { + return _tuple_desc_map; + } // Populate row_tuple_ids with our ids. void to_thrift(std::vector<TTupleId>* row_tuple_ids); diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index e365c0608b7..8a9e66fac6f 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -37,6 +37,7 @@ #include "agent/be_exec_version_manager.h" #include "cctz/time_zone.h" +#include "common/be_mock_util.h" #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" #include "common/factory_creator.h" @@ -96,7 +97,7 @@ public: RuntimeState(); // Empty d'tor to avoid issues with unique_ptr. - ~RuntimeState(); + MOCK_DEFINE(virtual) ~RuntimeState(); // Set per-query state. Status init(const TUniqueId& fragment_instance_id, const TQueryOptions& query_options, @@ -118,7 +119,7 @@ public: const DescriptorTbl& desc_tbl() const { return *_desc_tbl; } void set_desc_tbl(const DescriptorTbl* desc_tbl) { _desc_tbl = desc_tbl; } - int batch_size() const { return _query_options.batch_size; } + MOCK_FUNCTION int batch_size() const { return _query_options.batch_size; } int wait_full_block_schedule_times() const { return _query_options.wait_full_block_schedule_times; } diff --git a/be/test/testutil/mock/mock_descriptors.h b/be/test/testutil/mock/mock_descriptors.h new file mode 100644 index 00000000000..67ab1772756 --- /dev/null +++ b/be/test/testutil/mock/mock_descriptors.h @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <vector> + +#include "runtime/descriptors.h" +#include "vec/data_types/data_type.h" + +namespace doris { + +class MockSlotDescriptor : public SlotDescriptor { +public: + doris::vectorized::DataTypePtr get_data_type_ptr() const override { return type; } + vectorized::DataTypePtr type; +}; + +class MockTupleDescriptor : public TupleDescriptor { +public: + const std::vector<SlotDescriptor*>& slots() const override { return Slots; } + + std::vector<SlotDescriptor*> Slots; +}; + +class MockRowDescriptor : public RowDescriptor { +public: + MockRowDescriptor(std::vector<vectorized::DataTypePtr> types, ObjectPool* pool) { + std::vector<SlotDescriptor*> slots; + for (auto type : types) { + auto* slot = pool->add(new MockSlotDescriptor()); + slot->type = type; + slots.push_back(slot); + } + auto* tuple_desc = pool->add(new MockTupleDescriptor()); + tuple_desc->Slots = slots; + tuple_desc_map.push_back(tuple_desc); + } + const std::vector<TupleDescriptor*>& tuple_descriptors() const override { + return tuple_desc_map; + } + std::vector<TupleDescriptor*> tuple_desc_map; +}; + +} // namespace doris \ No newline at end of file diff --git a/be/test/testutil/mock/mock_runtime_state.h b/be/test/testutil/mock/mock_runtime_state.h new file mode 100644 index 00000000000..3a1b4f60797 --- /dev/null +++ b/be/test/testutil/mock/mock_runtime_state.h @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include "runtime/runtime_state.h" + +namespace doris { + +class MockRuntimeState : public RuntimeState { +public: + MockRuntimeState() = default; + + int batch_size() const override { return batchSize; } + + // default batch size + int batchSize = 4096; +}; + +} // namespace doris diff --git a/be/test/testutil/mock/mock_slot_ref.h b/be/test/testutil/mock/mock_slot_ref.h new file mode 100644 index 00000000000..17df6347962 --- /dev/null +++ b/be/test/testutil/mock/mock_slot_ref.h @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include <string> + +#include "common/status.h" +#include "vec/exprs/vexpr.h" + +namespace doris { +class SlotDescriptor; +class RowDescriptor; +class RuntimeState; +class TExprNode; + +namespace vectorized { +class Block; +class VExprContext; + +// use to mock a slot ref expr +class MockSlotRef final : public VExpr { +public: + MockSlotRef(int column_id) : _column_id(column_id) {}; + Status execute(VExprContext* context, Block* block, int* result_column_id) override { + *result_column_id = _column_id; + return Status::OK(); + } + const std::string& expr_name() const override { return _name; } + +private: + int _column_id; + const std::string _name = "MockSlotRef"; +}; + +} // namespace vectorized +} // namespace doris diff --git a/be/test/vec/exec/sort/sort_test.cpp b/be/test/vec/exec/sort/sort_test.cpp new file mode 100644 index 00000000000..2eb009eaec9 --- /dev/null +++ b/be/test/vec/exec/sort/sort_test.cpp @@ -0,0 +1,175 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gen_cpp/olap_file.pb.h> +#include <gen_cpp/types.pb.h> +#include <glog/logging.h> +#include <gtest/gtest.h> + +#include <algorithm> +#include <cstdint> +#include <random> +#include <utility> + +#include "common/object_pool.h" +#include "runtime/runtime_state.h" +#include "testutil/mock/mock_descriptors.h" +#include "testutil/mock/mock_runtime_state.h" +#include "testutil/mock/mock_slot_ref.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/common/sort/heap_sorter.h" +#include "vec/common/sort/sorter.h" +#include "vec/common/sort/topn_sorter.h" +#include "vec/common/sort/vsort_exec_exprs.h" +#include "vec/core/block.h" +#include "vec/exec/format/orc/vorc_reader.h" +namespace doris::vectorized { +class SortTest : public testing::Test { +public: + SortTest() = default; + ~SortTest() override = default; +}; + +enum class SortType { FULL_SORT, TOPN_SORT, HEAP_SORT }; // enum class SortType + +class SortTestParam { +public: + SortTestParam(SortType sort_type, int64_t limit, int64_t offset) + : sort_type(sort_type), limit(limit), offset(offset) { + std::vector<DataTypePtr> data_types {std::make_shared<DataTypeInt32>()}; + row_desc = std::make_unique<MockRowDescriptor>(data_types, &pool); + + sort_exec_exprs._sort_tuple_slot_expr_ctxs.push_back( + VExprContext::create_shared(std::make_shared<MockSlotRef>(0))); + + sort_exec_exprs._materialize_tuple = false; + + sort_exec_exprs._lhs_ordering_expr_ctxs.push_back( + VExprContext::create_shared(std::make_shared<MockSlotRef>(0))); + + switch (sort_type) { + case SortType::FULL_SORT: + sorter = FullSorter::create_unique(sort_exec_exprs, limit, offset, &pool, is_asc_order, + nulls_first, *row_desc, nullptr, nullptr); + break; + case SortType::TOPN_SORT: + sorter = TopNSorter::create_unique(sort_exec_exprs, limit, offset, &pool, is_asc_order, + nulls_first, *row_desc, nullptr, nullptr); + case SortType::HEAP_SORT: + sorter = HeapSorter::create_unique(sort_exec_exprs, limit, offset, &pool, is_asc_order, + nulls_first, *row_desc); + break; + default: + break; + } + } + + void append_block(ColumnInt32::Ptr column) { + Block block = VectorizedUtils::create_empty_block(*row_desc, true /*ignore invalid slot*/); + block.get_by_position(0).column = column->clone(); + EXPECT_TRUE(sorter->append_block(&block).ok()); + } + + void prepare_for_read() { EXPECT_TRUE(sorter->prepare_for_read().ok()); } + + void check_sort_column(ColumnPtr column) { + MutableBlock sorted_block(VectorizedUtils::create_columns_with_type_and_name(*row_desc)); + Block output_block; + bool eos = false; + MockRuntimeState state; + while (!eos) { + output_block.clear(); + EXPECT_TRUE(sorter->get_next(&state, &output_block, &eos).ok()); + + std::cout << output_block.dump_data() << std::endl; + EXPECT_TRUE(sorted_block.merge(std::move(output_block)).ok()); + } + Block result_block = sorted_block.to_block(); + const auto* except_column = assert_cast<const ColumnInt32*>(column.get()); + const auto* result_column = + assert_cast<const ColumnInt32*>(result_block.get_by_position(0).column.get()); + EXPECT_EQ(except_column->size(), result_column->size()); + for (int i = 0; i < except_column->size(); i++) { + EXPECT_EQ(except_column->get_element(i), result_column->get_element(i)); + } + } + SortType sort_type; + int64_t limit; + int64_t offset; + VSortExecExprs sort_exec_exprs; + ObjectPool pool; + std::unique_ptr<MockRowDescriptor> row_desc; + + std::vector<bool> is_asc_order {true}; + std::vector<bool> nulls_first {false}; + + std::unique_ptr<vectorized::Sorter> sorter; +}; // class SortTestParam + +std::pair<ColumnInt32::Ptr, ColumnInt32::Ptr> get_unsort_and_sorted_column(int64_t rows, + int64_t limit, + int64_t offset) { + std::vector<int32_t> unsort_data; + + for (int i = 0; i < rows; i++) { + unsort_data.push_back(i); + } + + std::vector<int32_t> sorted_data; + for (int i = offset; i < limit + offset; i++) { + sorted_data.push_back(i); + } + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(unsort_data.begin(), unsort_data.end(), g); + + auto unsort_column = ColumnInt32::create(); + for (auto i : unsort_data) { + unsort_column->insert_value(i); + } + auto sorted_column = ColumnInt32::create(); + for (auto i : sorted_data) { + sorted_column->insert_value(i); + } + return {std::move(unsort_column), std::move(sorted_column)}; +} + +void test_sort(SortType sort_type, int64_t rows, int64_t limit, int64_t offset) { + SortTestParam param(sort_type, limit, offset); + auto [unsort_column, sorted_column] = get_unsort_and_sorted_column(rows, limit, offset); + param.append_block(unsort_column); + param.prepare_for_read(); + param.check_sort_column(sorted_column->clone()); +} + +TEST_F(SortTest, test_full_sort) { + test_sort(SortType::FULL_SORT, 100, 10, 10); + test_sort(SortType::FULL_SORT, 1000, 10, 100); +} + +TEST_F(SortTest, test_topn_sort) { + test_sort(SortType::TOPN_SORT, 100, 10, 10); + test_sort(SortType::TOPN_SORT, 1000, 10, 100); +} + +TEST_F(SortTest, test_heap_sort) { + test_sort(SortType::HEAP_SORT, 100, 10, 10); + test_sort(SortType::HEAP_SORT, 1000, 10, 100); +} + +} // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org