This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4077338284 [Opt](parquet) opt the performance of date convertion 
(#22360)
4077338284 is described below

commit 4077338284e677617902a34986e734d2d1af742a
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Sun Jul 30 15:54:13 2023 +0800

    [Opt](parquet) opt the performance of date convertion (#22360)
    
    beforeļ¼š
    ```
    mysql>  select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (1.61 sec)
    ```
    
    after:
    ```
    mysql>  select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (0.86 sec)
    ```
---
 be/src/vec/exec/format/parquet/decoder.cpp                  | 6 ++++++
 be/src/vec/exec/format/parquet/decoder.h                    | 1 +
 be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp  | 7 ++++---
 be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp | 6 ++++--
 be/src/vec/runtime/vdatetime_value.h                        | 5 ++---
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/decoder.cpp 
b/be/src/vec/exec/format/parquet/decoder.cpp
index a1fc3706fd..539fc04a10 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -177,5 +177,11 @@ void Decoder::init(FieldSchema* field_schema, 
cctz::time_zone* ctz) {
             _decode_params->scale_to_nano_factor = 1000;
         }
     }
+
+    if (_decode_params->ctz) {
+        VecDateTimeValue t;
+        t.from_unixtime(0, *_decode_params->ctz);
+        _decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), 
t.day());
+    }
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/decoder.h 
b/be/src/vec/exec/format/parquet/decoder.h
index aacb3730ad..6c1030818c 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -71,6 +71,7 @@ struct DecodeParams {
     static const cctz::time_zone utc0;
     // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the 
time zone
     cctz::time_zone* ctz = nullptr;
+    size_t offset_days = 0;
     int64_t second_mask = 1;
     int64_t scale_to_nano_factor = 1;
     DecimalScaleParams decimal_scale;
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp 
b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index 887797636e..817b5e7f96 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -216,15 +216,16 @@ protected:
         size_t data_index = column_data.size();
         column_data.resize(data_index + select_vector.num_values() - 
select_vector.num_filtered());
         size_t dict_index = 0;
+
         ColumnSelectVector::DataReadType read_type;
         while (size_t run_length = 
select_vector.get_next_run<has_filter>(&read_type)) {
             switch (read_type) {
             case ColumnSelectVector::CONTENT: {
                 for (size_t i = 0; i < run_length; ++i) {
-                    int64_t date_value = _dict_items[_indexes[dict_index++]];
+                    int64_t date_value =
+                            _dict_items[_indexes[dict_index++]] + 
_decode_params->offset_days;
                     auto& v = 
reinterpret_cast<CppType&>(column_data[data_index++]);
-                    v.from_unixtime(date_value * 24 * 60 * 60,
-                                    *_decode_params->ctz); // day to seconds
+                    v.get_date_from_daynr(date_value);
                     if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
                         // we should cast to date if using date v1.
                         v.cast_to_date();
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp 
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
index 72d362fe61..940e70db79 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
@@ -248,14 +248,16 @@ Status 
FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
     size_t data_index = column_data.size();
     column_data.resize(data_index + select_vector.num_values() - 
select_vector.num_filtered());
     ColumnSelectVector::DataReadType read_type;
+
     while (size_t run_length = 
select_vector.get_next_run<has_filter>(&read_type)) {
         switch (read_type) {
         case ColumnSelectVector::CONTENT: {
             for (size_t i = 0; i < run_length; ++i) {
                 char* buf_start = _data->data + _offset;
-                int64_t date_value = 
static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start));
+                int64_t date_value = 
static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) +
+                                     _decode_params->offset_days;
                 auto& v = 
reinterpret_cast<CppType&>(column_data[data_index++]);
-                v.from_unixtime(date_value * 24 * 60 * 60, 
*_decode_params->ctz); // day to seconds
+                v.get_date_from_daynr(date_value);
                 if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
                     // we should cast to date if using date v1.
                     v.cast_to_date();
diff --git a/be/src/vec/runtime/vdatetime_value.h 
b/be/src/vec/runtime/vdatetime_value.h
index 639d7171cb..0abc314842 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -655,6 +655,8 @@ public:
         _type = TIME_DATETIME;
     }
 
+    bool get_date_from_daynr(uint64_t);
+
 private:
     // Used to make sure sizeof VecDateTimeValue
     friend class UnusedClass;
@@ -685,9 +687,6 @@ private:
     static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, 
uint32_t* year,
                              bool disable_lut = false);
 
-    // This is private function which modify date but modify `_type`
-    bool get_date_from_daynr(uint64_t);
-
     // Helper to set max, min, zero
     void set_zero(int type);
     void set_max_time(bool neg);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to