This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new fe7ff6f113d [Opt](functions) Opt tvf number for performance regression framework (#27582) fe7ff6f113d is described below commit fe7ff6f113d48e76613785d0d2a392bf83b08cca Author: zclllyybb <zhaochan...@selectdb.com> AuthorDate: Tue Nov 28 10:43:51 2023 +0800 [Opt](functions) Opt tvf number for performance regression framework (#27582) Opt tvf number for performance regression framework --- be/src/vec/columns/column_vector.h | 12 +++- .../vec/exec/data_gen_functions/vnumbers_tvf.cpp | 67 +++++++++++----------- be/src/vec/exec/data_gen_functions/vnumbers_tvf.h | 2 +- 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 00a49835c6b..77df238d2aa 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -144,7 +144,7 @@ public: using Container = PaddedPODArray<value_type>; private: - ColumnVector() {} + ColumnVector() = default; ColumnVector(const size_t n) : data(n) {} ColumnVector(const size_t n, const value_type x) : data(n, x) {} ColumnVector(const ColumnVector& src) : data(src.data.begin(), src.data.end()) {} @@ -177,7 +177,7 @@ public: size_t size() const override { return data.size(); } StringRef get_data_at(size_t n) const override { - return StringRef(reinterpret_cast<const char*>(&data[n]), sizeof(data[n])); + return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])}; } void insert_from(const IColumn& src, size_t n) override { @@ -195,6 +195,14 @@ public: memcpy(data.data() + old_size, data_ptr, num * sizeof(T)); } + void insert_range_of_integer(T begin, T end) { + auto old_size = data.size(); + data.resize(old_size + (end - begin)); + for (int i = 0; i < end - begin; i++) { + data[old_size + i] = begin + i; + } + } + void insert_date_column(const char* data_ptr, size_t num) { data.reserve(data.size() + num); constexpr size_t input_value_size = sizeof(uint24_t); diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp index 2ac6c0fca42..4c37dc3a35e 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp @@ -21,14 +21,18 @@ #include <gen_cpp/PlanNodes_types.h> #include <glog/logging.h> +#include <algorithm> #include <utility> #include "common/status.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" #include "vec/columns/column.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" namespace doris::vectorized { @@ -37,45 +41,44 @@ VNumbersTVF::VNumbersTVF(TupleId tuple_id, const TupleDescriptor* tuple_desc) : VDataGenFunctionInf(tuple_id, tuple_desc) {} Status VNumbersTVF::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) { - bool mem_reuse = block->mem_reuse(); DCHECK(block->rows() == 0); + RETURN_IF_CANCELLED(state); + bool mem_reuse = block->mem_reuse(); + int batch_size = state->batch_size(); std::vector<vectorized::MutableColumnPtr> columns(_slot_num); - do { - for (int i = 0; i < _slot_num; ++i) { - if (mem_reuse) { - columns[i] = std::move(*(block->get_by_position(i).column)).mutate(); - } else { - columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); - } + // now only support one column for tvf numbers + for (int i = 0; i < _slot_num; ++i) { + if (mem_reuse) { + columns[i] = std::move(*(block->get_by_position(i).column)).mutate(); + } else { + columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); } - while (true) { - RETURN_IF_CANCELLED(state); - int batch_size = state->batch_size(); - if (columns[0]->size() == batch_size) { - // what if batch_size < _total_numbers, should we set *eos? - break; - } - // if _total_numbers == 0, so we can break loop at now. - if (_cur_offset >= _total_numbers) { - *eos = true; - break; - } - columns[0]->insert_data(reinterpret_cast<const char*>(&_cur_offset), - sizeof(_cur_offset)); - ++_cur_offset; + + if (_total_numbers <= 0) [[unlikely]] { + *eos = true; + continue; } - auto n_columns = 0; - if (!mem_reuse) { - for (const auto slot_desc : _tuple_desc->slots()) { - block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]), - slot_desc->get_data_type_ptr(), - slot_desc->col_name())); - } + auto* column_res = assert_cast<ColumnInt64*>(columns[i].get()); //BIGINT + int64_t end_value = std::min((int64_t)(_next_number + batch_size), _total_numbers); + column_res->insert_range_of_integer(_next_number, end_value); + if (end_value == _total_numbers) { + *eos = true; } else { - columns.clear(); + _next_number = end_value; + } + } + + if (mem_reuse) { + columns.clear(); + } else { + size_t n_columns = 0; + for (const auto* slot_desc : _tuple_desc->slots()) { + block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]), + slot_desc->get_data_type_ptr(), + slot_desc->col_name())); } - } while (block->rows() == 0 && !(*eos)); + } return Status::OK(); } diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h index f65a777051b..310571ee765 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h @@ -47,7 +47,7 @@ protected: int64_t _total_numbers; // Number of returned columns, actually only 1 column int _slot_num = 1; - int64_t _cur_offset = 0; + int64_t _next_number = 0; }; } // namespace vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org