This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e72603618c [fix](variant) preserve TIMESTAMPTZ values in sparse path 
(#63522)
2e72603618c is described below

commit 2e72603618ca5ce998184bcc95285bf30405f5e0
Author: Chenyang Sun <[email protected]>
AuthorDate: Wed May 27 20:51:39 2026 +0800

    [fix](variant) preserve TIMESTAMPTZ values in sparse path (#63522)
    
    Add the missing write_one_cell_to_binary override mirroring
    DataTypeDateTimeV2SerDe so the writer also emits the scale byte. Reader
    is already correct.
    
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [x] Regression test
        - [x] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
    
    ---------
    
    Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
 .../data_type_timestamptz_serde.cpp                |  17 ++++
 .../data_type_serde/data_type_timestamptz_serde.h  |   4 +
 .../data_type_serde_timestamptz_test.cpp           |  83 +++++++++++++++++
 be/test/data/vec/columns/TIMESTAMPTZ(3).csv        |  16 ++++
 .../main/java/org/apache/doris/catalog/Type.java   |   6 ++
 .../variant_p0/test_variant_timestamptz_sparse.out |   7 ++
 .../test_variant_timestamptz_sparse.groovy         | 102 +++++++++++++++++++++
 7 files changed, 235 insertions(+)

diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp 
b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
index 3727faa6bee..c22c65dbd5a 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
@@ -250,4 +250,21 @@ std::string DataTypeTimeStampTzSerDe::to_olap_string(const 
Field& field) const {
     return CastToString::from_timestamptz(field.get<TYPE_TIMESTAMPTZ>(), 6);
 }
 
+void DataTypeTimeStampTzSerDe::write_one_cell_to_binary(const IColumn& 
src_column,
+                                                        ColumnString::Chars& 
chars,
+                                                        int64_t row_num) const 
{
+    const auto type = 
static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ);
+    const auto& data_ref = assert_cast<const 
ColumnTimeStampTz&>(src_column).get_data_at(row_num);
+    const auto sc = static_cast<uint8_t>(_scale);
+
+    const size_t old_size = chars.size();
+    const size_t new_size = old_size + sizeof(uint8_t) + sizeof(uint8_t) + 
data_ref.size;
+    chars.resize(new_size);
+    memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), 
sizeof(uint8_t));
+    memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const 
char*>(&sc),
+           sizeof(uint8_t));
+    memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(uint8_t), 
data_ref.data,
+           data_ref.size);
+}
+
 } // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h 
b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
index 459003e040f..0a595935d8f 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
@@ -72,6 +72,10 @@ public:
                                int64_t start, int64_t end, Arena& arena,
                                const FormatOptions& options) const override;
 
+    // Override needed: paired reader skips a scale byte; the inherited 
number-serde writer omits it.
+    void write_one_cell_to_binary(const IColumn& src_column, 
ColumnString::Chars& chars,
+                                  int64_t row_num) const override;
+
     std::string to_olap_string(const Field& field) const override;
 
 protected:
diff --git a/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp 
b/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
index 47eaf0565a5..b88601a02ad 100644
--- a/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
@@ -28,10 +28,13 @@
 
 #include "core/assert_cast.h"
 #include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
 #include "core/data_type/common_data_type_serder_test.h"
 #include "core/data_type/common_data_type_test.h"
 #include "core/data_type/data_type.h"
 #include "core/data_type_serde/data_type_timestamptz_serde.h"
+#include "core/field.h"
 #include "testutil/test_util.h"
 #include "util/slice.h"
 #include "util/string_util.h"
@@ -39,9 +42,11 @@ namespace doris {
 static std::string test_data_dir;
 
 static auto serde_tz_0 = std::make_shared<DataTypeTimeStampTzSerDe>(0);
+static auto serde_tz_3 = std::make_shared<DataTypeTimeStampTzSerDe>(3);
 static auto serde_tz_6 = std::make_shared<DataTypeTimeStampTzSerDe>(6);
 
 static ColumnTimeStampTz::MutablePtr column_tz_0;
+static ColumnTimeStampTz::MutablePtr column_tz_3;
 static ColumnTimeStampTz::MutablePtr column_tz_6;
 
 class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
@@ -51,6 +56,7 @@ public:
         test_data_dir = root_dir + "/be/test/data/vec/columns";
 
         column_tz_0 = ColumnTimeStampTz::create();
+        column_tz_3 = ColumnTimeStampTz::create();
         column_tz_6 = ColumnTimeStampTz::create();
 
         load_columns_data();
@@ -70,6 +76,7 @@ public:
             EXPECT_TRUE(!column->empty());
         };
         test_func(column_tz_0->get_ptr(), serde_tz_0, "TIMESTAMPTZ(0).csv");
+        test_func(column_tz_3->get_ptr(), serde_tz_3, "TIMESTAMPTZ(3).csv");
         test_func(column_tz_6->get_ptr(), serde_tz_6, "TIMESTAMPTZ(6).csv");
 
         std::cout << "loading test dataset done" << std::endl;
@@ -189,6 +196,82 @@ TEST_F(DataTypeTimeStampTzSerDeTest, serdes) {
         }
     };
     test_func(*serde_tz_0, column_tz_0);
+    test_func(*serde_tz_3, column_tz_3);
     test_func(*serde_tz_6, column_tz_6);
 }
+
+// Roundtrip via write_one_cell_to_binary + 
DataTypeSerDe::deserialize_binary_to_*
+// (the variant sparse-column path). Regression for DORIS-25915: the writer
+// previously inherited DataTypeNumberSerDe's default, which omitted the scale
+// byte, while the reader expected a scale byte — a 1-byte layout mismatch that
+// silently corrupted timestamptz values stored in variant sparse paths.
+TEST_F(DataTypeTimeStampTzSerDeTest, binary_roundtrip) {
+    auto test_func = [&](const DataTypeTimeStampTzSerDe& serde,
+                         const ColumnTimeStampTz::MutablePtr& source_column, 
uint32_t scale) {
+        const size_t row_count = source_column->size();
+        ASSERT_GT(row_count, 0);
+
+        // Per-row layout: [type:1][scale:1][value:8] = 10 bytes.
+        constexpr size_t kBytesPerRow =
+                sizeof(uint8_t) + sizeof(uint8_t) + 
sizeof(TimestampTzValue::underlying_value);
+
+        auto chars_col = ColumnString::create();
+        ColumnString::Chars& chars = chars_col->get_chars();
+        std::vector<size_t> row_offsets;
+        row_offsets.reserve(row_count + 1);
+        row_offsets.push_back(0);
+        for (size_t i = 0; i < row_count; ++i) {
+            serde.write_one_cell_to_binary(*source_column, chars, i);
+            row_offsets.push_back(chars.size());
+        }
+
+        // Layout: each row contributes exactly 10 bytes; first byte is the
+        // TIMESTAMPTZ FieldType; second is the scale.
+        ASSERT_EQ(chars.size(), row_count * kBytesPerRow);
+        for (size_t i = 0; i < row_count; ++i) {
+            ASSERT_EQ(row_offsets[i + 1] - row_offsets[i], kBytesPerRow);
+            EXPECT_EQ(chars[row_offsets[i]],
+                      
static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ));
+            EXPECT_EQ(chars[row_offsets[i] + 1], static_cast<uint8_t>(scale));
+        }
+
+        // Roundtrip via deserialize_binary_to_column (column path).
+        {
+            auto inner_col = ColumnTimeStampTz::create();
+            auto null_map = ColumnUInt8::create();
+            auto nullable_col = ColumnNullable::create(std::move(inner_col), 
std::move(null_map));
+            for (size_t i = 0; i < row_count; ++i) {
+                const uint8_t* start = chars.data() + row_offsets[i];
+                const uint8_t* end =
+                        DataTypeSerDe::deserialize_binary_to_column(start, 
*nullable_col);
+                EXPECT_EQ(end - start, kBytesPerRow);
+            }
+            const auto& read_col =
+                    assert_cast<const 
ColumnTimeStampTz&>(nullable_col->get_nested_column());
+            for (size_t i = 0; i < row_count; ++i) {
+                EXPECT_EQ(read_col.get_element(i), 
source_column->get_element(i))
+                        << "row " << i << " mismatched after binary roundtrip";
+            }
+        }
+
+        // Roundtrip via deserialize_binary_to_field (field path); scale
+        // should propagate into FieldInfo.
+        {
+            for (size_t i = 0; i < row_count; ++i) {
+                const uint8_t* start = chars.data() + row_offsets[i];
+                Field f;
+                FieldInfo info;
+                const uint8_t* end = 
DataTypeSerDe::deserialize_binary_to_field(start, f, info);
+                EXPECT_EQ(end - start, kBytesPerRow);
+                EXPECT_EQ(info.scale, static_cast<int>(scale));
+                EXPECT_EQ(f.get<TYPE_TIMESTAMPTZ>(), 
source_column->get_element(i))
+                        << "row " << i << " mismatched after field roundtrip";
+            }
+        }
+    };
+
+    test_func(*serde_tz_0, column_tz_0, 0);
+    test_func(*serde_tz_3, column_tz_3, 3);
+    test_func(*serde_tz_6, column_tz_6, 6);
+}
 } // namespace doris
diff --git a/be/test/data/vec/columns/TIMESTAMPTZ(3).csv 
b/be/test/data/vec/columns/TIMESTAMPTZ(3).csv
new file mode 100644
index 00000000000..078e9f15e52
--- /dev/null
+++ b/be/test/data/vec/columns/TIMESTAMPTZ(3).csv
@@ -0,0 +1,16 @@
+NULL
+0000-01-01 00:00:00 +00:00
+0000-01-01 00:00:00.000 +00:00
+0000-01-01 00:00:00.001 +00:00
+0000-01-01 00:00:00.123 +00:00
+0000-01-01 00:00:00.999 +00:00
+2023-08-08 20:20:20 +00:00
+2023-08-08 20:20:20.000 +00:00
+2023-08-08 20:20:20.001 +00:00
+2023-08-08 20:20:20.123 +00:00
+2023-08-08 20:20:20.999 +00:00
+9999-12-31 23:59:59 +08:00
+9999-12-31 23:59:59.000 +08:00
+9999-12-31 23:59:59.001 +08:00
+9999-12-31 23:59:59.123 +08:00
+9999-12-31 23:59:59.999 +08:00
diff --git a/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java 
b/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
index 8a544cade38..ce1c46b17ec 100644
--- a/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
@@ -268,6 +268,12 @@ public abstract class Type {
         structSubTypes.add(MAP);
         structSubTypes.add(STRUCT);
 
+        // Before adding a type here, the BE side must implement these three 
serde
+        // adapters for the type so values falling to the variant sparse path 
can
+        // round-trip correctly. Omitting any of them silently corrupts sparse 
reads.
+        //   * DataTypeXxxSerDe::write_one_cell_to_binary
+        //   * DataTypeXxxSerDe::deserialize_binary_to_field
+        //   * DataTypeXxxSerDe::deserialize_binary_to_column
         variantSubTypes = Lists.newArrayList();
         variantSubTypes.add(BOOLEAN);
         variantSubTypes.addAll(integerTypes);
diff --git 
a/regression-test/data/variant_p0/test_variant_timestamptz_sparse.out 
b/regression-test/data/variant_p0/test_variant_timestamptz_sparse.out
new file mode 100644
index 00000000000..0ad3d02aa15
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_timestamptz_sparse.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_all --
+1      2003-12-21 10:29:00+08:00       2003-12-21 10:29:00+08:00       
2003-12-21 10:29:00.000+08:00   2003-12-21 15:29:00.000+08:00   2003-12-21 
15:29:00.000000+08:00        2003-12-21 07:29:00.000000+08:00
+
+-- !select_cast_ts0 --
+1      2003-12-21 10:29:00+08:00       2003-12-21 10:29:00.000+08:00   
2003-12-21 15:29:00.000000+08:00
+
diff --git 
a/regression-test/suites/variant_p0/test_variant_timestamptz_sparse.groovy 
b/regression-test/suites/variant_p0/test_variant_timestamptz_sparse.groovy
new file mode 100644
index 00000000000..9163ea23337
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_timestamptz_sparse.groovy
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Reproduces DORIS-25915:
+//   When VARIANT has typed paths that exceed variant_max_subcolumns_count and
+//   variant_enable_typed_paths_to_sparse=true, timestamptz values that fall to
+//   the sparse path are read back as only the timezone suffix (e.g. "+00:00")
+//   instead of the full timestamp.
+suite("test_variant_timestamptz_sparse", "p0"){
+    sql " set time_zone = '+08:00' "
+    sql " set default_variant_enable_doc_mode = false "
+
+    sql "DROP TABLE IF EXISTS test_variant_timestamptz_sparse_repro"
+
+    sql """
+        CREATE TABLE test_variant_timestamptz_sparse_repro (
+          pk int,
+          var VARIANT<
+            'c1':bigint,
+            'c2':bigint,
+            'd1':date,
+            'd2':date,
+            'dt0':datetime(0),
+            'dt0n':datetime(0),
+            'dt3':datetime(3),
+            'dt3n':datetime(3),
+            'dt6':datetime(6),
+            'dt6n':datetime(6),
+            'ts0':timestamptz(0),
+            'ts0n':timestamptz(0),
+            'ts3':timestamptz(3),
+            'ts3n':timestamptz(3),
+            'ts6':timestamptz(6),
+            'ts6n':timestamptz(6),
+            'v1':string,
+            'v2':string,
+            properties("variant_max_subcolumns_count" = 
"2","variant_enable_typed_paths_to_sparse" = "true")
+          > NULL,
+          INDEX c1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c1"),
+          INDEX c2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c2"),
+          INDEX d1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d1"),
+          INDEX d2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d2"),
+          INDEX dt0 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0"),
+          INDEX dt0n (`var`) USING INVERTED PROPERTIES("field_pattern" = 
"dt0n"),
+          INDEX dt3 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3"),
+          INDEX dt3n (`var`) USING INVERTED PROPERTIES("field_pattern" = 
"dt3n"),
+          INDEX dt6 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6"),
+          INDEX dt6n (`var`) USING INVERTED PROPERTIES("field_pattern" = 
"dt6n"),
+          INDEX v1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v1"),
+          INDEX v2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v2")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(pk)
+        DISTRIBUTED BY HASH(pk) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+    """
+
+    sql """
+        INSERT INTO test_variant_timestamptz_sparse_repro VALUES
+        (1, 
'{"c1":1699516324,"c2":1690122421,"d1":"2024-02-25","d2":"2024-11-27","dt0":"2002-12-25
 12:42:19","dt0n":"2015-02-05 02:36:13","dt3":"2015-02-22 
02:09:01","dt3n":"2015-09-16 02:55:07","dt6":"2001-09-19 
09:53:52","dt6n":"2003-12-21 02:29:00","ts0":"2003-12-21 02:29:00 
+00:00","ts0n":"2003-12-21 02:29:00 +00:00","ts3":"2003-12-21 02:29:00 
+00:00","ts3n":"2003-12-21 02:29:00 -05:00","ts6":"2003-12-21 02:29:00 
-05:00","ts6n":"2003-12-21 02:29:00 +03:00","v1":"2024-12-10 10:16:19" [...]
+    """
+
+    sql "SYNC"
+
+    // Pre-fix: every column that fell to sparse returned just the timezone
+    // suffix ("+08:00"). Post-fix: each ts* path round-trips as a full 
timestamp.
+    qt_select_all """
+        SELECT
+          pk,
+          CAST(var['ts0']  AS string) AS str_ts0,
+          CAST(var['ts0n'] AS string) AS str_ts0n,
+          CAST(var['ts3']  AS string) AS str_ts3,
+          CAST(var['ts3n'] AS string) AS str_ts3n,
+          CAST(var['ts6']  AS string) AS str_ts6,
+          CAST(var['ts6n'] AS string) AS str_ts6n
+        FROM test_variant_timestamptz_sparse_repro
+        ORDER BY pk;
+    """
+
+    qt_select_cast_ts0 """
+        SELECT
+          pk,
+          CAST(var['ts0'] AS timestamptz(0)) AS cast_ts0,
+          CAST(var['ts3'] AS timestamptz(3)) AS cast_ts3,
+          CAST(var['ts6'] AS timestamptz(6)) AS cast_ts6
+        FROM test_variant_timestamptz_sparse_repro
+        ORDER BY pk;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to