This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2e72603618c [fix](variant) preserve TIMESTAMPTZ values in sparse path
(#63522)
2e72603618c is described below
commit 2e72603618ca5ce998184bcc95285bf30405f5e0
Author: Chenyang Sun <[email protected]>
AuthorDate: Wed May 27 20:51:39 2026 +0800
[fix](variant) preserve TIMESTAMPTZ values in sparse path (#63522)
Add the missing write_one_cell_to_binary override mirroring
DataTypeDateTimeV2SerDe so the writer also emits the scale byte. Reader
is already correct.
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [x] Regression test
- [x] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
.../data_type_timestamptz_serde.cpp | 17 ++++
.../data_type_serde/data_type_timestamptz_serde.h | 4 +
.../data_type_serde_timestamptz_test.cpp | 83 +++++++++++++++++
be/test/data/vec/columns/TIMESTAMPTZ(3).csv | 16 ++++
.../main/java/org/apache/doris/catalog/Type.java | 6 ++
.../variant_p0/test_variant_timestamptz_sparse.out | 7 ++
.../test_variant_timestamptz_sparse.groovy | 102 +++++++++++++++++++++
7 files changed, 235 insertions(+)
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
index 3727faa6bee..c22c65dbd5a 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
@@ -250,4 +250,21 @@ std::string DataTypeTimeStampTzSerDe::to_olap_string(const
Field& field) const {
return CastToString::from_timestamptz(field.get<TYPE_TIMESTAMPTZ>(), 6);
}
+void DataTypeTimeStampTzSerDe::write_one_cell_to_binary(const IColumn&
src_column,
+ ColumnString::Chars&
chars,
+ int64_t row_num) const
{
+ const auto type =
static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ);
+ const auto& data_ref = assert_cast<const
ColumnTimeStampTz&>(src_column).get_data_at(row_num);
+ const auto sc = static_cast<uint8_t>(_scale);
+
+ const size_t old_size = chars.size();
+ const size_t new_size = old_size + sizeof(uint8_t) + sizeof(uint8_t) +
data_ref.size;
+ chars.resize(new_size);
+ memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type),
sizeof(uint8_t));
+ memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const
char*>(&sc),
+ sizeof(uint8_t));
+ memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(uint8_t),
data_ref.data,
+ data_ref.size);
+}
+
} // namespace doris
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
index 459003e040f..0a595935d8f 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
@@ -72,6 +72,10 @@ public:
int64_t start, int64_t end, Arena& arena,
const FormatOptions& options) const override;
+ // Override needed: paired reader skips a scale byte; the inherited
number-serde writer omits it.
+ void write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
+ int64_t row_num) const override;
+
std::string to_olap_string(const Field& field) const override;
protected:
diff --git a/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
b/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
index 47eaf0565a5..b88601a02ad 100644
--- a/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
@@ -28,10 +28,13 @@
#include "core/assert_cast.h"
#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
#include "core/data_type/common_data_type_serder_test.h"
#include "core/data_type/common_data_type_test.h"
#include "core/data_type/data_type.h"
#include "core/data_type_serde/data_type_timestamptz_serde.h"
+#include "core/field.h"
#include "testutil/test_util.h"
#include "util/slice.h"
#include "util/string_util.h"
@@ -39,9 +42,11 @@ namespace doris {
static std::string test_data_dir;
static auto serde_tz_0 = std::make_shared<DataTypeTimeStampTzSerDe>(0);
+static auto serde_tz_3 = std::make_shared<DataTypeTimeStampTzSerDe>(3);
static auto serde_tz_6 = std::make_shared<DataTypeTimeStampTzSerDe>(6);
static ColumnTimeStampTz::MutablePtr column_tz_0;
+static ColumnTimeStampTz::MutablePtr column_tz_3;
static ColumnTimeStampTz::MutablePtr column_tz_6;
class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
@@ -51,6 +56,7 @@ public:
test_data_dir = root_dir + "/be/test/data/vec/columns";
column_tz_0 = ColumnTimeStampTz::create();
+ column_tz_3 = ColumnTimeStampTz::create();
column_tz_6 = ColumnTimeStampTz::create();
load_columns_data();
@@ -70,6 +76,7 @@ public:
EXPECT_TRUE(!column->empty());
};
test_func(column_tz_0->get_ptr(), serde_tz_0, "TIMESTAMPTZ(0).csv");
+ test_func(column_tz_3->get_ptr(), serde_tz_3, "TIMESTAMPTZ(3).csv");
test_func(column_tz_6->get_ptr(), serde_tz_6, "TIMESTAMPTZ(6).csv");
std::cout << "loading test dataset done" << std::endl;
@@ -189,6 +196,82 @@ TEST_F(DataTypeTimeStampTzSerDeTest, serdes) {
}
};
test_func(*serde_tz_0, column_tz_0);
+ test_func(*serde_tz_3, column_tz_3);
test_func(*serde_tz_6, column_tz_6);
}
+
+// Roundtrip via write_one_cell_to_binary +
DataTypeSerDe::deserialize_binary_to_*
+// (the variant sparse-column path). Regression for DORIS-25915: the writer
+// previously inherited DataTypeNumberSerDe's default, which omitted the scale
+// byte, while the reader expected a scale byte — a 1-byte layout mismatch that
+// silently corrupted timestamptz values stored in variant sparse paths.
+TEST_F(DataTypeTimeStampTzSerDeTest, binary_roundtrip) {
+ auto test_func = [&](const DataTypeTimeStampTzSerDe& serde,
+ const ColumnTimeStampTz::MutablePtr& source_column,
uint32_t scale) {
+ const size_t row_count = source_column->size();
+ ASSERT_GT(row_count, 0);
+
+ // Per-row layout: [type:1][scale:1][value:8] = 10 bytes.
+ constexpr size_t kBytesPerRow =
+ sizeof(uint8_t) + sizeof(uint8_t) +
sizeof(TimestampTzValue::underlying_value);
+
+ auto chars_col = ColumnString::create();
+ ColumnString::Chars& chars = chars_col->get_chars();
+ std::vector<size_t> row_offsets;
+ row_offsets.reserve(row_count + 1);
+ row_offsets.push_back(0);
+ for (size_t i = 0; i < row_count; ++i) {
+ serde.write_one_cell_to_binary(*source_column, chars, i);
+ row_offsets.push_back(chars.size());
+ }
+
+ // Layout: each row contributes exactly 10 bytes; first byte is the
+ // TIMESTAMPTZ FieldType; second is the scale.
+ ASSERT_EQ(chars.size(), row_count * kBytesPerRow);
+ for (size_t i = 0; i < row_count; ++i) {
+ ASSERT_EQ(row_offsets[i + 1] - row_offsets[i], kBytesPerRow);
+ EXPECT_EQ(chars[row_offsets[i]],
+
static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ));
+ EXPECT_EQ(chars[row_offsets[i] + 1], static_cast<uint8_t>(scale));
+ }
+
+ // Roundtrip via deserialize_binary_to_column (column path).
+ {
+ auto inner_col = ColumnTimeStampTz::create();
+ auto null_map = ColumnUInt8::create();
+ auto nullable_col = ColumnNullable::create(std::move(inner_col),
std::move(null_map));
+ for (size_t i = 0; i < row_count; ++i) {
+ const uint8_t* start = chars.data() + row_offsets[i];
+ const uint8_t* end =
+ DataTypeSerDe::deserialize_binary_to_column(start,
*nullable_col);
+ EXPECT_EQ(end - start, kBytesPerRow);
+ }
+ const auto& read_col =
+ assert_cast<const
ColumnTimeStampTz&>(nullable_col->get_nested_column());
+ for (size_t i = 0; i < row_count; ++i) {
+ EXPECT_EQ(read_col.get_element(i),
source_column->get_element(i))
+ << "row " << i << " mismatched after binary roundtrip";
+ }
+ }
+
+ // Roundtrip via deserialize_binary_to_field (field path); scale
+ // should propagate into FieldInfo.
+ {
+ for (size_t i = 0; i < row_count; ++i) {
+ const uint8_t* start = chars.data() + row_offsets[i];
+ Field f;
+ FieldInfo info;
+ const uint8_t* end =
DataTypeSerDe::deserialize_binary_to_field(start, f, info);
+ EXPECT_EQ(end - start, kBytesPerRow);
+ EXPECT_EQ(info.scale, static_cast<int>(scale));
+ EXPECT_EQ(f.get<TYPE_TIMESTAMPTZ>(),
source_column->get_element(i))
+ << "row " << i << " mismatched after field roundtrip";
+ }
+ }
+ };
+
+ test_func(*serde_tz_0, column_tz_0, 0);
+ test_func(*serde_tz_3, column_tz_3, 3);
+ test_func(*serde_tz_6, column_tz_6, 6);
+}
} // namespace doris
diff --git a/be/test/data/vec/columns/TIMESTAMPTZ(3).csv
b/be/test/data/vec/columns/TIMESTAMPTZ(3).csv
new file mode 100644
index 00000000000..078e9f15e52
--- /dev/null
+++ b/be/test/data/vec/columns/TIMESTAMPTZ(3).csv
@@ -0,0 +1,16 @@
+NULL
+0000-01-01 00:00:00 +00:00
+0000-01-01 00:00:00.000 +00:00
+0000-01-01 00:00:00.001 +00:00
+0000-01-01 00:00:00.123 +00:00
+0000-01-01 00:00:00.999 +00:00
+2023-08-08 20:20:20 +00:00
+2023-08-08 20:20:20.000 +00:00
+2023-08-08 20:20:20.001 +00:00
+2023-08-08 20:20:20.123 +00:00
+2023-08-08 20:20:20.999 +00:00
+9999-12-31 23:59:59 +08:00
+9999-12-31 23:59:59.000 +08:00
+9999-12-31 23:59:59.001 +08:00
+9999-12-31 23:59:59.123 +08:00
+9999-12-31 23:59:59.999 +08:00
diff --git a/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
index 8a544cade38..ce1c46b17ec 100644
--- a/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-type/src/main/java/org/apache/doris/catalog/Type.java
@@ -268,6 +268,12 @@ public abstract class Type {
structSubTypes.add(MAP);
structSubTypes.add(STRUCT);
+ // Before adding a type here, the BE side must implement these three
serde
+ // adapters for the type so values falling to the variant sparse path
can
+ // round-trip correctly. Omitting any of them silently corrupts sparse
reads.
+ // * DataTypeXxxSerDe::write_one_cell_to_binary
+ // * DataTypeXxxSerDe::deserialize_binary_to_field
+ // * DataTypeXxxSerDe::deserialize_binary_to_column
variantSubTypes = Lists.newArrayList();
variantSubTypes.add(BOOLEAN);
variantSubTypes.addAll(integerTypes);
diff --git
a/regression-test/data/variant_p0/test_variant_timestamptz_sparse.out
b/regression-test/data/variant_p0/test_variant_timestamptz_sparse.out
new file mode 100644
index 00000000000..0ad3d02aa15
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_timestamptz_sparse.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_all --
+1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00+08:00
2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000+08:00 2003-12-21
15:29:00.000000+08:00 2003-12-21 07:29:00.000000+08:00
+
+-- !select_cast_ts0 --
+1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00
2003-12-21 15:29:00.000000+08:00
+
diff --git
a/regression-test/suites/variant_p0/test_variant_timestamptz_sparse.groovy
b/regression-test/suites/variant_p0/test_variant_timestamptz_sparse.groovy
new file mode 100644
index 00000000000..9163ea23337
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_timestamptz_sparse.groovy
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Reproduces DORIS-25915:
+// When VARIANT has typed paths that exceed variant_max_subcolumns_count and
+// variant_enable_typed_paths_to_sparse=true, timestamptz values that fall to
+// the sparse path are read back as only the timezone suffix (e.g. "+00:00")
+// instead of the full timestamp.
+suite("test_variant_timestamptz_sparse", "p0"){
+ sql " set time_zone = '+08:00' "
+ sql " set default_variant_enable_doc_mode = false "
+
+ sql "DROP TABLE IF EXISTS test_variant_timestamptz_sparse_repro"
+
+ sql """
+ CREATE TABLE test_variant_timestamptz_sparse_repro (
+ pk int,
+ var VARIANT<
+ 'c1':bigint,
+ 'c2':bigint,
+ 'd1':date,
+ 'd2':date,
+ 'dt0':datetime(0),
+ 'dt0n':datetime(0),
+ 'dt3':datetime(3),
+ 'dt3n':datetime(3),
+ 'dt6':datetime(6),
+ 'dt6n':datetime(6),
+ 'ts0':timestamptz(0),
+ 'ts0n':timestamptz(0),
+ 'ts3':timestamptz(3),
+ 'ts3n':timestamptz(3),
+ 'ts6':timestamptz(6),
+ 'ts6n':timestamptz(6),
+ 'v1':string,
+ 'v2':string,
+ properties("variant_max_subcolumns_count" =
"2","variant_enable_typed_paths_to_sparse" = "true")
+ > NULL,
+ INDEX c1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c1"),
+ INDEX c2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c2"),
+ INDEX d1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d1"),
+ INDEX d2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d2"),
+ INDEX dt0 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0"),
+ INDEX dt0n (`var`) USING INVERTED PROPERTIES("field_pattern" =
"dt0n"),
+ INDEX dt3 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3"),
+ INDEX dt3n (`var`) USING INVERTED PROPERTIES("field_pattern" =
"dt3n"),
+ INDEX dt6 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6"),
+ INDEX dt6n (`var`) USING INVERTED PROPERTIES("field_pattern" =
"dt6n"),
+ INDEX v1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v1"),
+ INDEX v2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v2")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(pk)
+ DISTRIBUTED BY HASH(pk) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql """
+ INSERT INTO test_variant_timestamptz_sparse_repro VALUES
+ (1,
'{"c1":1699516324,"c2":1690122421,"d1":"2024-02-25","d2":"2024-11-27","dt0":"2002-12-25
12:42:19","dt0n":"2015-02-05 02:36:13","dt3":"2015-02-22
02:09:01","dt3n":"2015-09-16 02:55:07","dt6":"2001-09-19
09:53:52","dt6n":"2003-12-21 02:29:00","ts0":"2003-12-21 02:29:00
+00:00","ts0n":"2003-12-21 02:29:00 +00:00","ts3":"2003-12-21 02:29:00
+00:00","ts3n":"2003-12-21 02:29:00 -05:00","ts6":"2003-12-21 02:29:00
-05:00","ts6n":"2003-12-21 02:29:00 +03:00","v1":"2024-12-10 10:16:19" [...]
+ """
+
+ sql "SYNC"
+
+ // Pre-fix: every column that fell to sparse returned just the timezone
+ // suffix ("+08:00"). Post-fix: each ts* path round-trips as a full
timestamp.
+ qt_select_all """
+ SELECT
+ pk,
+ CAST(var['ts0'] AS string) AS str_ts0,
+ CAST(var['ts0n'] AS string) AS str_ts0n,
+ CAST(var['ts3'] AS string) AS str_ts3,
+ CAST(var['ts3n'] AS string) AS str_ts3n,
+ CAST(var['ts6'] AS string) AS str_ts6,
+ CAST(var['ts6n'] AS string) AS str_ts6n
+ FROM test_variant_timestamptz_sparse_repro
+ ORDER BY pk;
+ """
+
+ qt_select_cast_ts0 """
+ SELECT
+ pk,
+ CAST(var['ts0'] AS timestamptz(0)) AS cast_ts0,
+ CAST(var['ts3'] AS timestamptz(3)) AS cast_ts3,
+ CAST(var['ts6'] AS timestamptz(6)) AS cast_ts6
+ FROM test_variant_timestamptz_sparse_repro
+ ORDER BY pk;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]