This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fe7ff6f113d [Opt](functions) Opt tvf number for performance regression 
framework (#27582)
fe7ff6f113d is described below

commit fe7ff6f113d48e76613785d0d2a392bf83b08cca
Author: zclllyybb <zhaochan...@selectdb.com>
AuthorDate: Tue Nov 28 10:43:51 2023 +0800

    [Opt](functions) Opt tvf number for performance regression framework 
(#27582)
    
    Opt tvf number for performance regression framework
---
 be/src/vec/columns/column_vector.h                 | 12 +++-
 .../vec/exec/data_gen_functions/vnumbers_tvf.cpp   | 67 +++++++++++-----------
 be/src/vec/exec/data_gen_functions/vnumbers_tvf.h  |  2 +-
 3 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index 00a49835c6b..77df238d2aa 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -144,7 +144,7 @@ public:
     using Container = PaddedPODArray<value_type>;
 
 private:
-    ColumnVector() {}
+    ColumnVector() = default;
     ColumnVector(const size_t n) : data(n) {}
     ColumnVector(const size_t n, const value_type x) : data(n, x) {}
     ColumnVector(const ColumnVector& src) : data(src.data.begin(), 
src.data.end()) {}
@@ -177,7 +177,7 @@ public:
     size_t size() const override { return data.size(); }
 
     StringRef get_data_at(size_t n) const override {
-        return StringRef(reinterpret_cast<const char*>(&data[n]), 
sizeof(data[n]));
+        return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])};
     }
 
     void insert_from(const IColumn& src, size_t n) override {
@@ -195,6 +195,14 @@ public:
         memcpy(data.data() + old_size, data_ptr, num * sizeof(T));
     }
 
+    void insert_range_of_integer(T begin, T end) {
+        auto old_size = data.size();
+        data.resize(old_size + (end - begin));
+        for (int i = 0; i < end - begin; i++) {
+            data[old_size + i] = begin + i;
+        }
+    }
+
     void insert_date_column(const char* data_ptr, size_t num) {
         data.reserve(data.size() + num);
         constexpr size_t input_value_size = sizeof(uint24_t);
diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp 
b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
index 2ac6c0fca42..4c37dc3a35e 100644
--- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
+++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
@@ -21,14 +21,18 @@
 #include <gen_cpp/PlanNodes_types.h>
 #include <glog/logging.h>
 
+#include <algorithm>
 #include <utility>
 
 #include "common/status.h"
 #include "runtime/descriptors.h"
 #include "runtime/runtime_state.h"
 #include "vec/columns/column.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
+#include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
 
 namespace doris::vectorized {
@@ -37,45 +41,44 @@ VNumbersTVF::VNumbersTVF(TupleId tuple_id, const 
TupleDescriptor* tuple_desc)
         : VDataGenFunctionInf(tuple_id, tuple_desc) {}
 
 Status VNumbersTVF::get_next(RuntimeState* state, vectorized::Block* block, 
bool* eos) {
-    bool mem_reuse = block->mem_reuse();
     DCHECK(block->rows() == 0);
+    RETURN_IF_CANCELLED(state);
+    bool mem_reuse = block->mem_reuse();
+    int batch_size = state->batch_size();
     std::vector<vectorized::MutableColumnPtr> columns(_slot_num);
 
-    do {
-        for (int i = 0; i < _slot_num; ++i) {
-            if (mem_reuse) {
-                columns[i] = 
std::move(*(block->get_by_position(i).column)).mutate();
-            } else {
-                columns[i] = 
_tuple_desc->slots()[i]->get_empty_mutable_column();
-            }
+    // now only support one column for tvf numbers
+    for (int i = 0; i < _slot_num; ++i) {
+        if (mem_reuse) {
+            columns[i] = 
std::move(*(block->get_by_position(i).column)).mutate();
+        } else {
+            columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
         }
-        while (true) {
-            RETURN_IF_CANCELLED(state);
-            int batch_size = state->batch_size();
-            if (columns[0]->size() == batch_size) {
-                // what if batch_size < _total_numbers, should we set *eos?
-                break;
-            }
-            // if _total_numbers == 0, so we can break loop at now.
-            if (_cur_offset >= _total_numbers) {
-                *eos = true;
-                break;
-            }
-            columns[0]->insert_data(reinterpret_cast<const 
char*>(&_cur_offset),
-                                    sizeof(_cur_offset));
-            ++_cur_offset;
+
+        if (_total_numbers <= 0) [[unlikely]] {
+            *eos = true;
+            continue;
         }
-        auto n_columns = 0;
-        if (!mem_reuse) {
-            for (const auto slot_desc : _tuple_desc->slots()) {
-                
block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
-                                                    
slot_desc->get_data_type_ptr(),
-                                                    slot_desc->col_name()));
-            }
+        auto* column_res = assert_cast<ColumnInt64*>(columns[i].get()); 
//BIGINT
+        int64_t end_value = std::min((int64_t)(_next_number + batch_size), 
_total_numbers);
+        column_res->insert_range_of_integer(_next_number, end_value);
+        if (end_value == _total_numbers) {
+            *eos = true;
         } else {
-            columns.clear();
+            _next_number = end_value;
+        }
+    }
+
+    if (mem_reuse) {
+        columns.clear();
+    } else {
+        size_t n_columns = 0;
+        for (const auto* slot_desc : _tuple_desc->slots()) {
+            
block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
+                                                slot_desc->get_data_type_ptr(),
+                                                slot_desc->col_name()));
         }
-    } while (block->rows() == 0 && !(*eos));
+    }
     return Status::OK();
 }
 
diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h 
b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
index f65a777051b..310571ee765 100644
--- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
+++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
@@ -47,7 +47,7 @@ protected:
     int64_t _total_numbers;
     // Number of returned columns, actually only 1 column
     int _slot_num = 1;
-    int64_t _cur_offset = 0;
+    int64_t _next_number = 0;
 };
 
 } // namespace vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to