This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 4077338284 [Opt](parquet) opt the performance of date convertion (#22360) 4077338284 is described below commit 4077338284e677617902a34986e734d2d1af742a Author: HappenLee <happen...@hotmail.com> AuthorDate: Sun Jul 30 15:54:13 2023 +0800 [Opt](parquet) opt the performance of date convertion (#22360) beforeļ¼ ``` mysql> select count(l_commitdate) from lineitem; +---------------------+ | count(l_commitdate) | +---------------------+ | 600037902 | +---------------------+ 1 row in set (1.61 sec) ``` after: ``` mysql> select count(l_commitdate) from lineitem; +---------------------+ | count(l_commitdate) | +---------------------+ | 600037902 | +---------------------+ 1 row in set (0.86 sec) ``` --- be/src/vec/exec/format/parquet/decoder.cpp | 6 ++++++ be/src/vec/exec/format/parquet/decoder.h | 1 + be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp | 7 ++++--- be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp | 6 ++++-- be/src/vec/runtime/vdatetime_value.h | 5 ++--- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/be/src/vec/exec/format/parquet/decoder.cpp b/be/src/vec/exec/format/parquet/decoder.cpp index a1fc3706fd..539fc04a10 100644 --- a/be/src/vec/exec/format/parquet/decoder.cpp +++ b/be/src/vec/exec/format/parquet/decoder.cpp @@ -177,5 +177,11 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) { _decode_params->scale_to_nano_factor = 1000; } } + + if (_decode_params->ctz) { + VecDateTimeValue t; + t.from_unixtime(0, *_decode_params->ctz); + _decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), t.day()); + } } } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/decoder.h b/be/src/vec/exec/format/parquet/decoder.h index aacb3730ad..6c1030818c 100644 --- a/be/src/vec/exec/format/parquet/decoder.h +++ b/be/src/vec/exec/format/parquet/decoder.h @@ -71,6 +71,7 @@ struct DecodeParams { static const cctz::time_zone utc0; // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the time zone cctz::time_zone* ctz = nullptr; + size_t offset_days = 0; int64_t second_mask = 1; int64_t scale_to_nano_factor = 1; DecimalScaleParams decimal_scale; diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp index 887797636e..817b5e7f96 100644 --- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp +++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp @@ -216,15 +216,16 @@ protected: size_t data_index = column_data.size(); column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); size_t dict_index = 0; + ColumnSelectVector::DataReadType read_type; while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) { switch (read_type) { case ColumnSelectVector::CONTENT: { for (size_t i = 0; i < run_length; ++i) { - int64_t date_value = _dict_items[_indexes[dict_index++]]; + int64_t date_value = + _dict_items[_indexes[dict_index++]] + _decode_params->offset_days; auto& v = reinterpret_cast<CppType&>(column_data[data_index++]); - v.from_unixtime(date_value * 24 * 60 * 60, - *_decode_params->ctz); // day to seconds + v.get_date_from_daynr(date_value); if constexpr (std::is_same_v<CppType, VecDateTimeValue>) { // we should cast to date if using date v1. v.cast_to_date(); diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp index 72d362fe61..940e70db79 100644 --- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp +++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp @@ -248,14 +248,16 @@ Status FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column, size_t data_index = column_data.size(); column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); ColumnSelectVector::DataReadType read_type; + while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) { switch (read_type) { case ColumnSelectVector::CONTENT: { for (size_t i = 0; i < run_length; ++i) { char* buf_start = _data->data + _offset; - int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)); + int64_t date_value = static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) + + _decode_params->offset_days; auto& v = reinterpret_cast<CppType&>(column_data[data_index++]); - v.from_unixtime(date_value * 24 * 60 * 60, *_decode_params->ctz); // day to seconds + v.get_date_from_daynr(date_value); if constexpr (std::is_same_v<CppType, VecDateTimeValue>) { // we should cast to date if using date v1. v.cast_to_date(); diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 639d7171cb..0abc314842 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -655,6 +655,8 @@ public: _type = TIME_DATETIME; } + bool get_date_from_daynr(uint64_t); + private: // Used to make sure sizeof VecDateTimeValue friend class UnusedClass; @@ -685,9 +687,6 @@ private: static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year, bool disable_lut = false); - // This is private function which modify date but modify `_type` - bool get_date_from_daynr(uint64_t); - // Helper to set max, min, zero void set_zero(int type); void set_max_time(bool neg); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org