This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new e4d7b7a8bf3 branch-4.1: [fix](be) Handle legacy DecimalV2 segments
with missing precision/frac #63569 (#63749)
e4d7b7a8bf3 is described below
commit e4d7b7a8bf30d693f10c2485b9339532f7442ecf
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu May 28 13:38:28 2026 +0800
branch-4.1: [fix](be) Handle legacy DecimalV2 segments with missing
precision/frac #63569 (#63749)
Cherry-picked from #63569
Co-authored-by: TengJianPing <[email protected]>
Co-authored-by: Copilot <[email protected]>
---
be/src/core/data_type/data_type_factory.cpp | 19 ++-
be/test/core/data_type/data_type_decimal_test.cpp | 179 ++++++++++++++++++++++
2 files changed, 196 insertions(+), 2 deletions(-)
diff --git a/be/src/core/data_type/data_type_factory.cpp
b/be/src/core/data_type/data_type_factory.cpp
index 711ed07330f..755ef5a25ff 100644
--- a/be/src/core/data_type/data_type_factory.cpp
+++ b/be/src/core/data_type/data_type_factory.cpp
@@ -385,8 +385,23 @@ DataTypePtr DataTypeFactory::create_data_type(const
segment_v2::ColumnMetaPB& pc
nested = std::make_shared<DataTypeStruct>(dataTypes, names);
} else {
// TODO add precision and frac
- nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
- pcolumn.precision(),
pcolumn.frac(), -1);
+ auto meta_precision = pcolumn.precision();
+ auto meta_scale = pcolumn.frac();
+ if (pcolumn.type() ==
static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL)) {
+ // Segments written by Doris < 2.1.0 (before #26572) do not persist
+ // precision/frac in ColumnMetaPB, so they default to 0 when read
back.
+ // Pass UINT32_MAX to DataTypeDecimalV2 to signal that the original
+ // precision/scale are unknown; otherwise check_type_precision(0)
throws
+ // "meet invalid precision: real_precision=0".
+ UInt32 orig_precision =
+ meta_precision > 0 ? static_cast<UInt32>(meta_precision) :
UINT32_MAX;
+ UInt32 orig_scale = meta_precision > 0 ?
static_cast<UInt32>(meta_scale) : UINT32_MAX;
+ nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+ orig_precision, orig_scale,
-1);
+ } else {
+ nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+ meta_precision, meta_scale,
-1);
+ }
}
if (pcolumn.is_nullable() && nested) {
diff --git a/be/test/core/data_type/data_type_decimal_test.cpp
b/be/test/core/data_type/data_type_decimal_test.cpp
index 09a1a2da052..3240d1e795a 100644
--- a/be/test/core/data_type/data_type_decimal_test.cpp
+++ b/be/test/core/data_type/data_type_decimal_test.cpp
@@ -28,16 +28,24 @@
#include <limits>
#include <type_traits>
+#include "agent/be_exec_version_manager.h"
#include "common/exception.h"
#include "core/assert_cast.h"
#include "core/column/column.h"
#include "core/data_type/common_data_type_serder_test.h"
#include "core/data_type/common_data_type_test.h"
#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_agg_state.h"
+#include "core/data_type/data_type_array.h"
#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
#include "core/field.h"
#include "core/types.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "storage/tablet/tablet_schema.h"
#include "testutil/test_util.h"
namespace doris {
@@ -474,6 +482,177 @@ TEST_F(DataTypeDecimalTest, ser_deser) {
test_func(dt_decimal256_3, *column_decimal256_3, USE_CONST_SERDE);
test_func(dt_decimal256_3, *column_decimal256_3, AGGREGATION_2_1_VERSION);
}
+// Regression for legacy DecimalV2 segments written by Doris < 2.1.0 (before
+// #26572): ColumnMetaPB.precision/frac are absent and default to 0 when read
+// back. DataTypeFactory must not pass 0 as the original precision/scale to
+// DataTypeDecimalV2, otherwise check_type_precision throws
+// "meet invalid precision: real_precision=0".
+TEST_F(DataTypeDecimalTest, create_decimalv2_from_legacy_tablet_column) {
+ // Case 1: legacy segment — precision/frac missing (default 0)
+ {
+ TabletColumn col;
+ col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+ // intentionally do not set precision/frac to mimic old segments
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col,
false));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ // When original precision/scale are unknown, get_original_* should
fall
+ // back to the in-memory (27, 9) representation.
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+ // Case 2: new segment — precision/frac are persisted (e.g. decimal(26,6))
+ {
+ TabletColumn col;
+ col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+ col.set_precision(26);
+ col.set_frac(6);
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col,
false));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ EXPECT_EQ(decv2->get_original_precision(), 26U);
+ EXPECT_EQ(decv2->get_original_scale(), 6U);
+ }
+ // Case 3: same regression via the segment-read path that consumes
+ // ColumnMetaPB directly (this is the exact code path that broke after
+ // PR #35222 when reading old DecimalV2 segments).
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+ // precision/frac intentionally unset -> default 0
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+}
+
+// Regression for complex types wrapping legacy DecimalV2 children whose
+// segment ColumnMetaPB.precision/frac default to 0. The fix in
+// DataTypeFactory::create_data_type(segment_v2::ColumnMetaPB&) must
+// propagate through Array / Map / Struct / AggState recursively.
+TEST_F(DataTypeDecimalTest, create_complex_types_with_legacy_decimalv2) {
+ auto make_legacy_decv2_meta = []() {
+ segment_v2::ColumnMetaPB child;
+ child.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+ // precision/frac intentionally unset -> default 0
+ return child;
+ };
+ auto unwrap_nullable = [](const DataTypePtr& dt) -> DataTypePtr {
+ if (dt && dt->is_nullable()) {
+ return assert_cast<const
DataTypeNullable*>(dt.get())->get_nested_type();
+ }
+ return dt;
+ };
+
+ // Array<DecimalV2>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+ const auto* arr = assert_cast<const DataTypeArray*>(dt.get());
+ auto elem = unwrap_nullable(arr->get_nested_type());
+ EXPECT_EQ(elem->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(elem.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+
+ // Map<Int, DecimalV2>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+ auto* k = meta.add_children_columns();
+ k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_MAP);
+ const auto* m = assert_cast<const DataTypeMap*>(dt.get());
+ EXPECT_EQ(unwrap_nullable(m->get_key_type())->get_primitive_type(),
TYPE_INT);
+ auto v = unwrap_nullable(m->get_value_type());
+ EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(v.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ }
+
+ // Struct<DecimalV2, Int>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRUCT));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ auto* second = meta.add_children_columns();
+ second->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_STRUCT);
+ const auto* s = assert_cast<const DataTypeStruct*>(dt.get());
+ ASSERT_EQ(s->get_elements().size(), 2u);
+ EXPECT_EQ(unwrap_nullable(s->get_element(0))->get_primitive_type(),
TYPE_DECIMALV2);
+ EXPECT_EQ(unwrap_nullable(s->get_element(1))->get_primitive_type(),
TYPE_INT);
+ }
+
+ // Nested: Array<Map<Int, DecimalV2>>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+ auto* map_child = meta.add_children_columns();
+ map_child->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+ auto* k = map_child->add_children_columns();
+ k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ *map_child->add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+ auto inner =
+ unwrap_nullable(assert_cast<const
DataTypeArray*>(dt.get())->get_nested_type());
+ EXPECT_EQ(inner->get_primitive_type(), TYPE_MAP);
+ auto v = unwrap_nullable(assert_cast<const
DataTypeMap*>(inner.get())->get_value_type());
+ EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+ }
+
+ // AggState (count) with legacy DecimalV2 sub-type
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_AGG_STATE));
+ meta.set_function_name("count");
+ meta.set_result_is_nullable(false);
+ meta.set_be_exec_version(BeExecVersionManager::get_newest_version());
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_AGG_STATE);
+ const auto* agg = assert_cast<const DataTypeAggState*>(dt.get());
+ ASSERT_EQ(agg->get_sub_types().size(), 1u);
+ auto sub = unwrap_nullable(agg->get_sub_types()[0]);
+ EXPECT_EQ(sub->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(sub.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+}
+
TEST_F(DataTypeDecimalTest, to_pb_column_meta) {
auto test_func = [](auto dt, PGenericType_TypeId expected_type) {
auto col_meta = std::make_shared<PColumnMeta>();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]