This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 691f3c5ee7a36a4ae374dbef731f25ce5c2bf20d Author: lihangyu <15605149...@163.com> AuthorDate: Sat May 11 11:31:24 2024 +0800 [Performance](Variant) Improve load performance for variant type (#33890) 1. remove phmap for padding rows 2. add SimpleFieldVisitorToScarlarType for short circuit type deducing 3. correct type coercion for conflict types bettween integers 4. improve nullable column performance 5. remove shared_ptr dependancy for DataType use TypeIndex instead 6. Optimization by caching the order of fields (which is almost always the same) and a quick check to match the next expected field, instead of searching the hash table. benchmark: In clickbench data, load performance: 12m36.799s ->7m10.934s about 43% latency reduce In variant_p2/performance.groovy: 3min44s20 -> 1min15s80 about 66% latency reducy --- be/src/vec/columns/column_object.cpp | 189 ++++++++++++++++----- be/src/vec/columns/column_object.h | 28 ++- be/src/vec/common/schema_util.cpp | 57 ++++--- be/src/vec/common/schema_util.h | 4 +- be/src/vec/core/field.h | 5 + be/src/vec/json/parse2column.cpp | 22 +-- .../suites/variant_p2/performance.groovy | 36 ++++ 7 files changed, 248 insertions(+), 93 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index ddb5bee6e01..3bae978f4d3 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -45,6 +45,7 @@ #include "util/defer_op.h" #include "util/simd/bits.h" #include "vec/aggregate_functions/aggregate_function.h" +#include "vec/aggregate_functions/helpers.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_nullable.h" @@ -56,6 +57,7 @@ #include "vec/common/field_visitors.h" #include "vec/common/schema_util.h" #include "vec/common/string_buffer.hpp" +#include "vec/common/string_ref.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/field.h" #include "vec/core/types.h" @@ -68,6 +70,7 @@ #include "vec/data_types/data_type_nothing.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/get_least_supertype.h" +#include "vec/json/path_in_data.h" #ifdef __AVX2__ #include "util/jsonb_parser_simd.h" @@ -78,23 +81,22 @@ namespace doris::vectorized { namespace { -DataTypePtr create_array_of_type(DataTypePtr type, size_t num_dimensions, bool is_nullable) { - const DataTypeNullable* nullable = typeid_cast<const DataTypeNullable*>(type.get()); - if ((nullable && - typeid_cast<const ColumnObject::MostCommonType*>(nullable->get_nested_type().get())) || - typeid_cast<const ColumnObject::MostCommonType*>(type.get())) { +DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_nullable) { + if (type == ColumnObject::MOST_COMMON_TYPE_ID) { // JSONB type MUST NOT wrapped in ARRAY column, it should be top level. // So we ignored num_dimensions. - return type; + return is_nullable ? make_nullable(std::make_shared<ColumnObject::MostCommonType>()) + : std::make_shared<ColumnObject::MostCommonType>(); } + DataTypePtr result = DataTypeFactory::instance().create_data_type(type, is_nullable); for (size_t i = 0; i < num_dimensions; ++i) { - type = std::make_shared<DataTypeArray>(std::move(type)); + result = std::make_shared<DataTypeArray>(result); if (is_nullable) { // wrap array with nullable - type = make_nullable(type); + result = make_nullable(result); } } - return type; + return result; } DataTypePtr get_base_type_of_array(const DataTypePtr& type) { @@ -149,6 +151,63 @@ public: } }; +// Visitor that allows to get type of scalar field +// but exclude fields contain complex field.This is a faster version +// for FieldVisitorToScalarType which does not support complex field. +class SimpleFieldVisitorToScalarType : public StaticVisitor<size_t> { +public: + size_t operator()(const Array& x) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Array type is not supported"); + } + size_t operator()(const UInt64& x) { + if (x <= std::numeric_limits<Int8>::max()) { + type = TypeIndex::Int8; + } else if (x <= std::numeric_limits<Int16>::max()) { + type = TypeIndex::Int16; + } else if (x <= std::numeric_limits<Int32>::max()) { + type = TypeIndex::Int32; + } else { + type = TypeIndex::Int64; + } + return 1; + } + size_t operator()(const Int64& x) { + if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) { + type = TypeIndex::Int8; + } else if (x <= std::numeric_limits<Int16>::max() && + x >= std::numeric_limits<Int16>::min()) { + type = TypeIndex::Int16; + } else if (x <= std::numeric_limits<Int32>::max() && + x >= std::numeric_limits<Int32>::min()) { + type = TypeIndex::Int32; + } else { + type = TypeIndex::Int64; + } + return 1; + } + size_t operator()(const JsonbField& x) { + type = TypeIndex::JSONB; + return 1; + } + size_t operator()(const Null&) { + have_nulls = true; + return 1; + } + template <typename T> + size_t operator()(const T&) { + type = TypeId<NearestFieldType<T>>::value; + return 1; + } + void get_scalar_type(TypeIndex* data_type) const { *data_type = type; } + bool contain_nulls() const { return have_nulls; } + + bool need_convert_field() const { return false; } + +private: + TypeIndex type = TypeIndex::Nothing; + bool have_nulls; +}; + /// Visitor that allows to get type of scalar field /// or least common type of scalars in array. /// More optimized version of FieldToDataType. @@ -208,8 +267,10 @@ public: type_indexes.insert(TypeId<NearestFieldType<T>>::value); return 0; } - void get_scalar_type(DataTypePtr* type) const { - get_least_supertype<LeastSupertypeOnError::Jsonb>(type_indexes, type); + void get_scalar_type(TypeIndex* type) const { + DataTypePtr data_type; + get_least_supertype<LeastSupertypeOnError::Jsonb>(type_indexes, &data_type); + *type = data_type->get_type_id(); } bool contain_nulls() const { return have_nulls; } bool need_convert_field() const { return field_types.size() > 1; } @@ -221,20 +282,30 @@ private: }; } // namespace -void get_field_info(const Field& field, FieldInfo* info) { - FieldVisitorToScalarType to_scalar_type_visitor; + +template <typename Visitor> +void get_field_info_impl(const Field& field, FieldInfo* info) { + Visitor to_scalar_type_visitor; apply_visitor(to_scalar_type_visitor, field); - DataTypePtr type = nullptr; - to_scalar_type_visitor.get_scalar_type(&type); + TypeIndex type_id; + to_scalar_type_visitor.get_scalar_type(&type_id); // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]] *info = { - type, + type_id, to_scalar_type_visitor.contain_nulls(), to_scalar_type_visitor.need_convert_field(), apply_visitor(FieldVisitorToNumberOfDimensions(), field), }; } +void get_field_info(const Field& field, FieldInfo* info) { + if (field.is_complex_field()) { + get_field_info_impl<FieldVisitorToScalarType>(field, info); + } else { + get_field_info_impl<SimpleFieldVisitorToScalarType>(field, info); + } +} + ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr&& data_, DataTypePtr type, bool is_nullable_, bool is_root_) : least_common_type(type), is_nullable(is_nullable_), is_root(is_root_) { @@ -285,8 +356,8 @@ void ColumnObject::Subcolumn::add_new_column_part(DataTypePtr type) { } void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { - auto base_type = std::move(info.scalar_type); - if (is_nothing(base_type)) { + auto base_type = WhichDataType(info.scalar_type_id); + if (base_type.is_nothing()) { insertDefault(); return; } @@ -295,7 +366,7 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { if (is_nothing(least_common_type.get_base())) { column_dim = value_dim; } - if (is_nothing(base_type)) { + if (base_type.is_nothing()) { value_dim = column_dim; } bool type_changed = false; @@ -305,29 +376,30 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { "Dimension of types mismatched between inserted value and column, " "expected:{}, but meet:{} for type:{}", column_dim, value_dim, least_common_type.get()->get_name()); - base_type = std::make_shared<MostCommonType>(); + base_type = MOST_COMMON_TYPE_ID; value_dim = 0; type_changed = true; } - if (is_nullable && !is_nothing(base_type)) { - base_type = make_nullable(base_type); - } - - const auto& least_common_base_type = least_common_type.get_base(); if (data.empty()) { - add_new_column_part(create_array_of_type(std::move(base_type), value_dim, is_nullable)); - } else if (!least_common_base_type->equals(*base_type) && !is_nothing(base_type)) { - if (!schema_util::is_conversion_required_between_integers(*base_type, - *least_common_base_type)) { + add_new_column_part(create_array_of_type(base_type.idx, value_dim, is_nullable)); + } else if (least_common_type.get_type_id() != base_type.idx && !base_type.is_nothing()) { + if (schema_util::is_conversion_required_between_integers(base_type.idx, + least_common_type.get_type_id())) { + LOG_EVERY_N(INFO, 100) << "Conversion between " << getTypeName(base_type.idx) << " and " + << getTypeName(least_common_type.get_type_id()); + DataTypePtr base_data_type; + TypeIndex base_data_type_id; get_least_supertype<LeastSupertypeOnError::Jsonb>( - DataTypes {std::move(base_type), least_common_base_type}, &base_type); + TypeIndexSet {base_type.idx, least_common_type.get_base_type_id()}, + &base_data_type); type_changed = true; + base_data_type_id = base_data_type->get_type_id(); if (is_nullable) { - base_type = make_nullable(base_type); + base_data_type = make_nullable(base_data_type); } - if (!least_common_base_type->equals(*base_type)) { + if (!least_common_type.get_base()->equals(*base_data_type)) { add_new_column_part( - create_array_of_type(std::move(base_type), value_dim, is_nullable)); + create_array_of_type(base_data_type_id, value_dim, is_nullable)); } } } @@ -578,6 +650,14 @@ ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_) if (!WhichDataType(type).is_nothing()) { least_common_type_serder = type->get_serde(); } + type_id = type->is_nullable() ? assert_cast<const DataTypeNullable*>(type.get()) + ->get_nested_type() + ->get_type_id() + : type->get_type_id(); + base_type_id = base_type->is_nullable() ? assert_cast<const DataTypeNullable*>(base_type.get()) + ->get_nested_type() + ->get_type_id() + : base_type->get_type_id(); } ColumnObject::ColumnObject(bool is_nullable_, bool create_root_) @@ -677,14 +757,12 @@ void ColumnObject::try_insert(const Field& field) { return; } const auto& object = field.get<const VariantMap&>(); - phmap::flat_hash_set<std::string> inserted; size_t old_size = size(); for (const auto& [key_str, value] : object) { PathInData key; if (!key_str.empty()) { key = PathInData(key_str); } - inserted.insert(key_str); if (!has_subcolumn(key)) { bool succ = add_sub_column(key, old_size); if (!succ) { @@ -700,7 +778,7 @@ void ColumnObject::try_insert(const Field& field) { subcolumn->insert(value); } for (auto& entry : subcolumns) { - if (!inserted.contains(entry->path.get_path())) { + if (old_size == entry->data.size()) { entry->data.insertDefault(); } } @@ -749,16 +827,6 @@ Status ColumnObject::try_insert_indices_from(const IColumn& src, const int* indi return Status::OK(); } -FieldInfo ColumnObject::Subcolumn::get_subcolumn_field_info() const { - const auto& base_type = least_common_type.get_base(); - return FieldInfo { - .scalar_type = base_type, - .have_nulls = base_type->is_nullable(), - .need_convert = false, - .num_dimensions = least_common_type.get_dimensions(), - }; -} - void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t length) { #ifndef NDEBUG check_consistency(); @@ -809,6 +877,33 @@ const ColumnObject::Subcolumn* ColumnObject::get_subcolumn(const PathInData& key return &node->data; } +const ColumnObject::Subcolumn* ColumnObject::get_subcolumn_with_cache(const PathInData& key, + size_t key_index) const { + // Optimization by caching the order of fields (which is almost always the same) + // and a quick check to match the next expected field, instead of searching the hash table. + if (_prev_positions.size() > key_index && _prev_positions[key_index].second != nullptr && + key == _prev_positions[key_index].first) { + return _prev_positions[key_index].second; + } + const auto* subcolumn = get_subcolumn(key); + if (key_index >= _prev_positions.size()) { + _prev_positions.resize(key_index + 1); + } + if (subcolumn != nullptr) { + _prev_positions[key_index] = std::make_pair(key, subcolumn); + } + return subcolumn; +} + +ColumnObject::Subcolumn* ColumnObject::get_subcolumn(const PathInData& key, size_t key_index) { + return const_cast<ColumnObject::Subcolumn*>(get_subcolumn_with_cache(key, key_index)); +} + +const ColumnObject::Subcolumn* ColumnObject::get_subcolumn(const PathInData& key, + size_t key_index) const { + return get_subcolumn_with_cache(key, key_index); +} + ColumnObject::Subcolumn* ColumnObject::get_subcolumn(const PathInData& key) { const auto* node = subcolumns.find_leaf(key); if (node == nullptr) { @@ -1238,6 +1333,7 @@ void ColumnObject::finalize(bool ignore_sparse) { } std::swap(subcolumns, new_subcolumns); doc_structure = nullptr; + _prev_positions.clear(); } void ColumnObject::finalize() { @@ -1356,6 +1452,7 @@ void ColumnObject::clear() { Subcolumns empty; std::swap(empty, subcolumns); num_rows = 0; + _prev_positions.clear(); } void ColumnObject::revise_to(int target_num_rows) { diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 8573428ff2b..55abd534dd1 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -35,6 +35,7 @@ #include "common/status.h" #include "olap/tablet_schema.h" +#include "util/jsonb_document.h" #include "vec/columns/column.h" #include "vec/columns/subcolumn_tree.h" #include "vec/common/cow.h" @@ -62,8 +63,8 @@ namespace doris::vectorized { /// It allows to recreate field with different number /// of dimensions or nullability. struct FieldInfo { - /// The common type of of all scalars in field. - DataTypePtr scalar_type; + /// The common type id of of all scalars in field. + TypeIndex scalar_type_id; /// Do we have NULL scalar in field. bool have_nulls; /// If true then we have scalars with different types in array and @@ -72,6 +73,7 @@ struct FieldInfo { /// Number of dimension in array. 0 if field is scalar. size_t num_dimensions; }; + void get_field_info(const Field& field, FieldInfo* info); /** A column that represents object with dynamic set of subcolumns. * Subcolumns are identified by paths in document and are stored in @@ -91,6 +93,7 @@ public: // Using jsonb type as most common type, since it's adopted all types of json using MostCommonType = DataTypeJsonb; + constexpr static TypeIndex MOST_COMMON_TYPE_ID = TypeIndex::JSONB; class Subcolumn { public: Subcolumn() = default; @@ -147,8 +150,6 @@ public: /// Returns last inserted field. Field get_last_field() const; - FieldInfo get_subcolumn_field_info() const; - /// Returns single column if subcolumn in finalizes. /// Otherwise -- undefined behaviour. IColumn& get_finalized_column(); @@ -176,6 +177,10 @@ public: const DataTypePtr& get_base() const { return base_type; } + const TypeIndex& get_type_id() const { return type_id; } + + const TypeIndex& get_base_type_id() const { return base_type_id; } + size_t get_dimensions() const { return num_dimensions; } void remove_nullable() { type = doris::vectorized::remove_nullable(type); } @@ -185,6 +190,8 @@ public: private: DataTypePtr type; DataTypePtr base_type; + TypeIndex type_id; + TypeIndex base_type_id; size_t num_dimensions = 0; DataTypeSerDeSPtr least_common_type_serder; }; @@ -227,6 +234,10 @@ private: // used for quickly row store encoding ColumnPtr rowstore_column; + using SubColumnWithName = std::pair<PathInData, const Subcolumn*>; + // Cached search results for previous row (keyed as index in JSON object) - used as a hint. + mutable std::vector<SubColumnWithName> _prev_positions; + public: static constexpr auto COLUMN_NAME_DUMMY = "_dummy"; @@ -289,6 +300,9 @@ public: // return null if not found const Subcolumn* get_subcolumn(const PathInData& key) const; + // return null if not found + const Subcolumn* get_subcolumn(const PathInData& key, size_t index_hint) const; + /** More efficient methods of manipulation */ [[noreturn]] IColumn& get_data() { LOG(FATAL) << "Not implemented method get_data()"; @@ -302,6 +316,12 @@ public: // return null if not found Subcolumn* get_subcolumn(const PathInData& key); + // return null if not found + Subcolumn* get_subcolumn(const PathInData& key, size_t index_hint); + + // return null if not found + const Subcolumn* get_subcolumn_with_cache(const PathInData& key, size_t index_hint) const; + void incr_num_rows() { ++num_rows; } void incr_num_rows(size_t n) { num_rows += n; } diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 5c7a2f8482a..2f9e5ded212 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -109,36 +109,41 @@ Array create_empty_array_field(size_t num_dimensions) { return array; } -bool is_conversion_required_between_integers(const IDataType& lhs, const IDataType& rhs) { +size_t get_size_of_interger(TypeIndex type) { + switch (type) { + case TypeIndex::Int8: + return sizeof(int8_t); + case TypeIndex::Int16: + return sizeof(int16_t); + case TypeIndex::Int32: + return sizeof(int32_t); + case TypeIndex::Int64: + return sizeof(int64_t); + case TypeIndex::Int128: + return sizeof(int128_t); + case TypeIndex::UInt8: + return sizeof(uint8_t); + case TypeIndex::UInt16: + return sizeof(uint16_t); + case TypeIndex::UInt32: + return sizeof(uint32_t); + case TypeIndex::UInt64: + return sizeof(uint64_t); + case TypeIndex::UInt128: + return sizeof(uint128_t); + default: + LOG(FATAL) << "Unknown integer type: " << getTypeName(type); + return 0; + } +} + +bool is_conversion_required_between_integers(const TypeIndex& lhs, const TypeIndex& rhs) { WhichDataType which_lhs(lhs); WhichDataType which_rhs(rhs); bool is_native_int = which_lhs.is_native_int() && which_rhs.is_native_int(); bool is_native_uint = which_lhs.is_native_uint() && which_rhs.is_native_uint(); - return (is_native_int || is_native_uint) && - lhs.get_size_of_value_in_memory() <= rhs.get_size_of_value_in_memory(); -} - -bool is_conversion_required_between_integers(FieldType lhs, FieldType rhs) { - // We only support signed integers for semi-structure data at present - // TODO add unsigned integers - if (lhs == FieldType::OLAP_FIELD_TYPE_BIGINT) { - return !(rhs == FieldType::OLAP_FIELD_TYPE_TINYINT || - rhs == FieldType::OLAP_FIELD_TYPE_SMALLINT || - rhs == FieldType::OLAP_FIELD_TYPE_INT || rhs == FieldType::OLAP_FIELD_TYPE_BIGINT); - } - if (lhs == FieldType::OLAP_FIELD_TYPE_INT) { - return !(rhs == FieldType::OLAP_FIELD_TYPE_TINYINT || - rhs == FieldType::OLAP_FIELD_TYPE_SMALLINT || - rhs == FieldType::OLAP_FIELD_TYPE_INT); - } - if (lhs == FieldType::OLAP_FIELD_TYPE_SMALLINT) { - return !(rhs == FieldType::OLAP_FIELD_TYPE_TINYINT || - rhs == FieldType::OLAP_FIELD_TYPE_SMALLINT); - } - if (lhs == FieldType::OLAP_FIELD_TYPE_TINYINT) { - return !(rhs == FieldType::OLAP_FIELD_TYPE_TINYINT); - } - return true; + return (!is_native_int && !is_native_uint) || + get_size_of_interger(lhs) > get_size_of_interger(rhs); } Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, ColumnPtr* result) { diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index e6ed60480f5..078081593c5 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -34,6 +34,7 @@ #include "vec/columns/column_object.h" #include "vec/core/columns_with_type_and_name.h" #include "vec/core/field.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/json/path_in_data.h" @@ -66,8 +67,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co /// If both of types are signed/unsigned integers and size of left field type /// is less than right type, we don't need to convert field, /// because all integer fields are stored in Int64/UInt64. -bool is_conversion_required_between_integers(const IDataType& lhs, const IDataType& rhs); -bool is_conversion_required_between_integers(FieldType lhs, FieldType rhs); +bool is_conversion_required_between_integers(const TypeIndex& lhs, const TypeIndex& rhs); struct ExtraInfo { // -1 indicates it's not a Frontend generated column diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 356216e7074..de2d544e7e0 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -493,6 +493,11 @@ public: return *this; } + bool is_complex_field() const { + return which == Types::Array || which == Types::Map || which == Types::Tuple || + which == Types::VariantMap; + } + Field& operator=(Field&& rhs) { if (this != &rhs) { if (which != rhs.which) { diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp index cc3c649bb70..0f61e24dad7 100644 --- a/be/src/vec/json/parse2column.cpp +++ b/be/src/vec/json/parse2column.cpp @@ -148,36 +148,28 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length, } auto& [paths, values] = *result; assert(paths.size() == values.size()); - phmap::flat_hash_set<std::string> paths_set; - size_t num_rows = column_object.size(); + size_t old_num_rows = column_object.size(); for (size_t i = 0; i < paths.size(); ++i) { FieldInfo field_info; get_field_info(values[i], &field_info); - if (is_nothing(field_info.scalar_type)) { + if (WhichDataType(field_info.scalar_type_id).is_nothing()) { continue; } - if (!paths_set.insert(paths[i].get_path()).second) { - // return Status::DataQualityError( - // fmt::format("Object has ambiguous path {}, {}", paths[i].get_path())); - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Object has ambiguous path {}", - paths[i].get_path()); - } - - if (!column_object.has_subcolumn(paths[i])) { - column_object.add_sub_column(paths[i], num_rows); + if (column_object.get_subcolumn(paths[i], i) == nullptr) { + column_object.add_sub_column(paths[i], old_num_rows); } - auto* subcolumn = column_object.get_subcolumn(paths[i]); + auto* subcolumn = column_object.get_subcolumn(paths[i], i); if (!subcolumn) { throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to find sub column {}", paths[i].get_path()); } - assert(subcolumn->size() == num_rows); + DCHECK_EQ(subcolumn->size(), old_num_rows); subcolumn->insert(std::move(values[i]), std::move(field_info)); } // /// Insert default values to missed subcolumns. const auto& subcolumns = column_object.get_subcolumns(); for (const auto& entry : subcolumns) { - if (!paths_set.contains(entry->path.get_path())) { + if (entry->data.size() == old_num_rows) { entry->data.insertDefault(); } } diff --git a/regression-test/suites/variant_p2/performance.groovy b/regression-test/suites/variant_p2/performance.groovy new file mode 100644 index 00000000000..1f10dd90c04 --- /dev/null +++ b/regression-test/suites/variant_p2/performance.groovy @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("regression_test_variant_performance", "p2"){ + sql """CREATE TABLE IF NOT EXISTS var_perf ( + k bigint, + v variant + + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY RANDOM BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + sql """ + insert into var_perf + SELECT *, '{"field1":348,"field2":596,"field3":781,"field4":41,"field5":922,"field6":84,"field7":222,"field8":312,"field9":490,"field10":715,"field11":837,"field12":753,"field13":171,"field14":727,"field15":739,"field16":545,"field17":964,"field18":540,"field19":685,"field20":828,"field21":157,"field22":404,"field23":287,"field24":481,"field25":476,"field26":559,"field27":144,"field28":545,"field29":70,"field30":668,"field31":820,"field32":193,"field33":465,"field34":347, [...] + from numbers("number" = "10000000") + union all + SELECT *, '{"field1":201,"field2":465,"field3":977,"field4":101112,"field5":131415,"field6":216,"field7":192021,"field8":822324,"field9":525627,"field10":928930,"field11":413233,"field12":243536,"field13":373839,"field14":404142,"field15":434445,"field16":1464748,"field17":495051,"field18":525354,"field19":565657,"field20":1585960,"field21":616263,"field22":646566,"field23":676869,"field24":707172,"field25":737475,"field26":767778,"field27":798081,"field28":828384,"field2 [...] + from numbers("number" = "10000000") + """ +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org