This is an automated email from the ASF dual-hosted git repository.

zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 72cffc57586 [Test] (BE-UT)Add sorter BE-UT and foundational mocks 
(#47265)
72cffc57586 is described below

commit 72cffc5758632dd9b82decda0fadcaca2af25088
Author: Mryange <yanxuech...@selectdb.com>
AuthorDate: Thu Jan 23 14:49:43 2025 +0800

    [Test] (BE-UT)Add sorter BE-UT and foundational mocks (#47265)
    
    ### What problem does this PR solve?
    1. Add BEUT tests for the sorter.
    2. Introduce some mock basic types for future development.
    3. Introduce BE_BENCHMARK because some functions are only defined in
    BE-UT, to prevent BENCHMARK compilation from failing.
---
 be/CMakeLists.txt                           |   3 +
 be/src/common/be_mock_util.h                |  42 +++++++
 be/src/pipeline/exec/exchange_sink_buffer.h |   2 +-
 be/src/runtime/descriptors.cpp              |  17 +++
 be/src/runtime/descriptors.h                |  21 +++-
 be/src/runtime/runtime_state.h              |   5 +-
 be/test/testutil/mock/mock_descriptors.h    |  59 ++++++++++
 be/test/testutil/mock/mock_runtime_state.h  |  33 ++++++
 be/test/testutil/mock/mock_slot_ref.h       |  50 ++++++++
 be/test/vec/exec/sort/sort_test.cpp         | 175 ++++++++++++++++++++++++++++
 10 files changed, 398 insertions(+), 9 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index d476af8e211..223c98d9365 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -713,6 +713,9 @@ endif ()
 # use this to avoid some runtime tracker. reuse BE_TEST symbol, no need 
another.
 if (BUILD_BENCHMARK)
     add_definitions(-DBE_TEST)
+# The separate BENCHMARK marker is introduced here because 
+# some BE UTs mock certain functions, and BENCHMARK cannot find their 
definitions.
+    add_definitions(-DBE_BENCHMARK)
 endif()
 
 get_directory_property(COMPILER_FLAGS COMPILE_OPTIONS)
diff --git a/be/src/common/be_mock_util.h b/be/src/common/be_mock_util.h
new file mode 100644
index 00000000000..8633937b41e
--- /dev/null
+++ b/be/src/common/be_mock_util.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+// #define BE_TEST
+
+#ifdef BE_TEST
+#define MOCK_FUNCTION virtual
+#else
+#define MOCK_FUNCTION
+#endif
+
+#ifdef BE_TEST
+#define MOCK_DEFINE(str) str
+#else
+#define MOCK_DEFINE(str)
+#endif
+
+#ifdef BE_TEST
+#define MOCK_REMOVE(str)
+#else
+#define MOCK_REMOVE(str) str
+#endif
+
+/*
+#include "common/be_mock_util.h"
+*/
diff --git a/be/src/pipeline/exec/exchange_sink_buffer.h 
b/be/src/pipeline/exec/exchange_sink_buffer.h
index 458c7c3f66e..80e5dc42289 100644
--- a/be/src/pipeline/exec/exchange_sink_buffer.h
+++ b/be/src/pipeline/exec/exchange_sink_buffer.h
@@ -207,7 +207,7 @@ private:
                      +-----------------+       +-----------------+    
+-----------------+
 */
 
-#ifdef BE_TEST
+#if defined(BE_TEST) && !defined(BE_BENCHMARK)
 void transmit_blockv2(PBackendService_Stub& stub,
                       std::unique_ptr<AutoReleaseClosure<PTransmitDataParams,
                                                          
ExchangeSendCallback<PTransmitDataResult>>>
diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp
index bea11feb916..c19abf62924 100644
--- a/be/src/runtime/descriptors.cpp
+++ b/be/src/runtime/descriptors.cpp
@@ -77,6 +77,23 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc)
           _column_paths(pdesc.column_paths().begin(), 
pdesc.column_paths().end()),
           _is_auto_increment(pdesc.is_auto_increment()) {}
 
+#ifdef BE_TEST
+SlotDescriptor::SlotDescriptor()
+        : _id(0),
+          _type(TypeDescriptor {}),
+          _parent(0),
+          _col_pos(0),
+          _is_nullable(false),
+          _col_unique_id(0),
+          _col_type(PrimitiveType {}),
+          _slot_idx(0),
+          _field_idx(-1),
+          _is_materialized(false),
+          _is_key(false),
+          _need_materialize(true),
+          _is_auto_increment(false) {}
+#endif
+
 void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const {
     pslot->set_id(_id);
     pslot->set_parent(_parent);
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index b807c567543..3bf755ce67c 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -32,14 +32,15 @@
 #include <utility>
 #include <vector>
 
+#include "common/be_mock_util.h"
 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/global_types.h"
+#include "common/object_pool.h"
 #include "common/status.h"
 #include "olap/utils.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/types.h"
 #include "vec/data_types/data_type.h"
-
 namespace google::protobuf {
 template <typename Element>
 class RepeatedField;
@@ -53,7 +54,7 @@ class PSlotDescriptor;
 
 class SlotDescriptor {
 public:
-    // virtual ~SlotDescriptor() {};
+    MOCK_DEFINE(virtual ~SlotDescriptor() = default;)
     SlotId id() const { return _id; }
     const TypeDescriptor& type() const { return _type; }
     TupleId parent() const { return _parent; }
@@ -74,7 +75,7 @@ public:
 
     vectorized::MutableColumnPtr get_empty_mutable_column() const;
 
-    doris::vectorized::DataTypePtr get_data_type_ptr() const;
+    MOCK_FUNCTION doris::vectorized::DataTypePtr get_data_type_ptr() const;
 
     int32_t col_unique_id() const { return _col_unique_id; }
 
@@ -131,6 +132,7 @@ private:
 
     SlotDescriptor(const TSlotDescriptor& tdesc);
     SlotDescriptor(const PSlotDescriptor& pdesc);
+    MOCK_DEFINE(SlotDescriptor();)
 };
 
 // Base class for table descriptors.
@@ -342,15 +344,18 @@ public:
     TupleDescriptor(TupleDescriptor&&) = delete;
     void operator=(const TupleDescriptor&) = delete;
 
-    ~TupleDescriptor() {
+    MOCK_DEFINE(virtual) ~TupleDescriptor() {
         if (_own_slots) {
             for (SlotDescriptor* slot : _slots) {
                 delete slot;
             }
         }
     }
+
+    MOCK_DEFINE(TupleDescriptor() : _id {0} {};)
+
     int num_materialized_slots() const { return _num_materialized_slots; }
-    const std::vector<SlotDescriptor*>& slots() const { return _slots; }
+    MOCK_FUNCTION const std::vector<SlotDescriptor*>& slots() const { return 
_slots; }
 
     bool has_varlen_slots() const { return _has_varlen_slots; }
     const TableDescriptor* table_desc() const { return _table_desc; }
@@ -461,6 +466,8 @@ public:
     // dummy descriptor, needed for the JNI EvalPredicate() function
     RowDescriptor() = default;
 
+    MOCK_DEFINE(virtual ~RowDescriptor() = default;)
+
     int num_materialized_slots() const { return _num_materialized_slots; }
 
     int num_slots() const { return _num_slots; }
@@ -474,7 +481,9 @@ public:
     bool has_varlen_slots() const { return _has_varlen_slots; }
 
     // Return descriptors for all tuples in this row, in order of appearance.
-    const std::vector<TupleDescriptor*>& tuple_descriptors() const { return 
_tuple_desc_map; }
+    MOCK_FUNCTION const std::vector<TupleDescriptor*>& tuple_descriptors() 
const {
+        return _tuple_desc_map;
+    }
 
     // Populate row_tuple_ids with our ids.
     void to_thrift(std::vector<TTupleId>* row_tuple_ids);
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index e365c0608b7..8a9e66fac6f 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -37,6 +37,7 @@
 
 #include "agent/be_exec_version_manager.h"
 #include "cctz/time_zone.h"
+#include "common/be_mock_util.h"
 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/config.h"
 #include "common/factory_creator.h"
@@ -96,7 +97,7 @@ public:
     RuntimeState();
 
     // Empty d'tor to avoid issues with unique_ptr.
-    ~RuntimeState();
+    MOCK_DEFINE(virtual) ~RuntimeState();
 
     // Set per-query state.
     Status init(const TUniqueId& fragment_instance_id, const TQueryOptions& 
query_options,
@@ -118,7 +119,7 @@ public:
 
     const DescriptorTbl& desc_tbl() const { return *_desc_tbl; }
     void set_desc_tbl(const DescriptorTbl* desc_tbl) { _desc_tbl = desc_tbl; }
-    int batch_size() const { return _query_options.batch_size; }
+    MOCK_FUNCTION int batch_size() const { return _query_options.batch_size; }
     int wait_full_block_schedule_times() const {
         return _query_options.wait_full_block_schedule_times;
     }
diff --git a/be/test/testutil/mock/mock_descriptors.h 
b/be/test/testutil/mock/mock_descriptors.h
new file mode 100644
index 00000000000..67ab1772756
--- /dev/null
+++ b/be/test/testutil/mock/mock_descriptors.h
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "runtime/descriptors.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris {
+
+class MockSlotDescriptor : public SlotDescriptor {
+public:
+    doris::vectorized::DataTypePtr get_data_type_ptr() const override { return 
type; }
+    vectorized::DataTypePtr type;
+};
+
+class MockTupleDescriptor : public TupleDescriptor {
+public:
+    const std::vector<SlotDescriptor*>& slots() const override { return Slots; 
}
+
+    std::vector<SlotDescriptor*> Slots;
+};
+
+class MockRowDescriptor : public RowDescriptor {
+public:
+    MockRowDescriptor(std::vector<vectorized::DataTypePtr> types, ObjectPool* 
pool) {
+        std::vector<SlotDescriptor*> slots;
+        for (auto type : types) {
+            auto* slot = pool->add(new MockSlotDescriptor());
+            slot->type = type;
+            slots.push_back(slot);
+        }
+        auto* tuple_desc = pool->add(new MockTupleDescriptor());
+        tuple_desc->Slots = slots;
+        tuple_desc_map.push_back(tuple_desc);
+    }
+    const std::vector<TupleDescriptor*>& tuple_descriptors() const override {
+        return tuple_desc_map;
+    }
+    std::vector<TupleDescriptor*> tuple_desc_map;
+};
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/test/testutil/mock/mock_runtime_state.h 
b/be/test/testutil/mock/mock_runtime_state.h
new file mode 100644
index 00000000000..3a1b4f60797
--- /dev/null
+++ b/be/test/testutil/mock/mock_runtime_state.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/runtime_state.h"
+
+namespace doris {
+
+class MockRuntimeState : public RuntimeState {
+public:
+    MockRuntimeState() = default;
+
+    int batch_size() const override { return batchSize; }
+
+    // default batch size
+    int batchSize = 4096;
+};
+
+} // namespace doris
diff --git a/be/test/testutil/mock/mock_slot_ref.h 
b/be/test/testutil/mock/mock_slot_ref.h
new file mode 100644
index 00000000000..17df6347962
--- /dev/null
+++ b/be/test/testutil/mock/mock_slot_ref.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <string>
+
+#include "common/status.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris {
+class SlotDescriptor;
+class RowDescriptor;
+class RuntimeState;
+class TExprNode;
+
+namespace vectorized {
+class Block;
+class VExprContext;
+
+// use to mock a slot ref expr
+class MockSlotRef final : public VExpr {
+public:
+    MockSlotRef(int column_id) : _column_id(column_id) {};
+    Status execute(VExprContext* context, Block* block, int* result_column_id) 
override {
+        *result_column_id = _column_id;
+        return Status::OK();
+    }
+    const std::string& expr_name() const override { return _name; }
+
+private:
+    int _column_id;
+    const std::string _name = "MockSlotRef";
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/test/vec/exec/sort/sort_test.cpp 
b/be/test/vec/exec/sort/sort_test.cpp
new file mode 100644
index 00000000000..2eb009eaec9
--- /dev/null
+++ b/be/test/vec/exec/sort/sort_test.cpp
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gen_cpp/olap_file.pb.h>
+#include <gen_cpp/types.pb.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <random>
+#include <utility>
+
+#include "common/object_pool.h"
+#include "runtime/runtime_state.h"
+#include "testutil/mock/mock_descriptors.h"
+#include "testutil/mock/mock_runtime_state.h"
+#include "testutil/mock/mock_slot_ref.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/sort/heap_sorter.h"
+#include "vec/common/sort/sorter.h"
+#include "vec/common/sort/topn_sorter.h"
+#include "vec/common/sort/vsort_exec_exprs.h"
+#include "vec/core/block.h"
+#include "vec/exec/format/orc/vorc_reader.h"
+namespace doris::vectorized {
+class SortTest : public testing::Test {
+public:
+    SortTest() = default;
+    ~SortTest() override = default;
+};
+
+enum class SortType { FULL_SORT, TOPN_SORT, HEAP_SORT }; // enum class SortType
+
+class SortTestParam {
+public:
+    SortTestParam(SortType sort_type, int64_t limit, int64_t offset)
+            : sort_type(sort_type), limit(limit), offset(offset) {
+        std::vector<DataTypePtr> data_types 
{std::make_shared<DataTypeInt32>()};
+        row_desc = std::make_unique<MockRowDescriptor>(data_types, &pool);
+
+        sort_exec_exprs._sort_tuple_slot_expr_ctxs.push_back(
+                VExprContext::create_shared(std::make_shared<MockSlotRef>(0)));
+
+        sort_exec_exprs._materialize_tuple = false;
+
+        sort_exec_exprs._lhs_ordering_expr_ctxs.push_back(
+                VExprContext::create_shared(std::make_shared<MockSlotRef>(0)));
+
+        switch (sort_type) {
+        case SortType::FULL_SORT:
+            sorter = FullSorter::create_unique(sort_exec_exprs, limit, offset, 
&pool, is_asc_order,
+                                               nulls_first, *row_desc, 
nullptr, nullptr);
+            break;
+        case SortType::TOPN_SORT:
+            sorter = TopNSorter::create_unique(sort_exec_exprs, limit, offset, 
&pool, is_asc_order,
+                                               nulls_first, *row_desc, 
nullptr, nullptr);
+        case SortType::HEAP_SORT:
+            sorter = HeapSorter::create_unique(sort_exec_exprs, limit, offset, 
&pool, is_asc_order,
+                                               nulls_first, *row_desc);
+            break;
+        default:
+            break;
+        }
+    }
+
+    void append_block(ColumnInt32::Ptr column) {
+        Block block = VectorizedUtils::create_empty_block(*row_desc, true 
/*ignore invalid slot*/);
+        block.get_by_position(0).column = column->clone();
+        EXPECT_TRUE(sorter->append_block(&block).ok());
+    }
+
+    void prepare_for_read() { EXPECT_TRUE(sorter->prepare_for_read().ok()); }
+
+    void check_sort_column(ColumnPtr column) {
+        MutableBlock 
sorted_block(VectorizedUtils::create_columns_with_type_and_name(*row_desc));
+        Block output_block;
+        bool eos = false;
+        MockRuntimeState state;
+        while (!eos) {
+            output_block.clear();
+            EXPECT_TRUE(sorter->get_next(&state, &output_block, &eos).ok());
+
+            std::cout << output_block.dump_data() << std::endl;
+            EXPECT_TRUE(sorted_block.merge(std::move(output_block)).ok());
+        }
+        Block result_block = sorted_block.to_block();
+        const auto* except_column = assert_cast<const 
ColumnInt32*>(column.get());
+        const auto* result_column =
+                assert_cast<const 
ColumnInt32*>(result_block.get_by_position(0).column.get());
+        EXPECT_EQ(except_column->size(), result_column->size());
+        for (int i = 0; i < except_column->size(); i++) {
+            EXPECT_EQ(except_column->get_element(i), 
result_column->get_element(i));
+        }
+    }
+    SortType sort_type;
+    int64_t limit;
+    int64_t offset;
+    VSortExecExprs sort_exec_exprs;
+    ObjectPool pool;
+    std::unique_ptr<MockRowDescriptor> row_desc;
+
+    std::vector<bool> is_asc_order {true};
+    std::vector<bool> nulls_first {false};
+
+    std::unique_ptr<vectorized::Sorter> sorter;
+}; // class SortTestParam
+
+std::pair<ColumnInt32::Ptr, ColumnInt32::Ptr> 
get_unsort_and_sorted_column(int64_t rows,
+                                                                           
int64_t limit,
+                                                                           
int64_t offset) {
+    std::vector<int32_t> unsort_data;
+
+    for (int i = 0; i < rows; i++) {
+        unsort_data.push_back(i);
+    }
+
+    std::vector<int32_t> sorted_data;
+    for (int i = offset; i < limit + offset; i++) {
+        sorted_data.push_back(i);
+    }
+    std::random_device rd;
+    std::mt19937 g(rd());
+    std::shuffle(unsort_data.begin(), unsort_data.end(), g);
+
+    auto unsort_column = ColumnInt32::create();
+    for (auto i : unsort_data) {
+        unsort_column->insert_value(i);
+    }
+    auto sorted_column = ColumnInt32::create();
+    for (auto i : sorted_data) {
+        sorted_column->insert_value(i);
+    }
+    return {std::move(unsort_column), std::move(sorted_column)};
+}
+
+void test_sort(SortType sort_type, int64_t rows, int64_t limit, int64_t 
offset) {
+    SortTestParam param(sort_type, limit, offset);
+    auto [unsort_column, sorted_column] = get_unsort_and_sorted_column(rows, 
limit, offset);
+    param.append_block(unsort_column);
+    param.prepare_for_read();
+    param.check_sort_column(sorted_column->clone());
+}
+
+TEST_F(SortTest, test_full_sort) {
+    test_sort(SortType::FULL_SORT, 100, 10, 10);
+    test_sort(SortType::FULL_SORT, 1000, 10, 100);
+}
+
+TEST_F(SortTest, test_topn_sort) {
+    test_sort(SortType::TOPN_SORT, 100, 10, 10);
+    test_sort(SortType::TOPN_SORT, 1000, 10, 100);
+}
+
+TEST_F(SortTest, test_heap_sort) {
+    test_sort(SortType::HEAP_SORT, 100, 10, 10);
+    test_sort(SortType::HEAP_SORT, 1000, 10, 100);
+}
+
+} // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to