This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 7d933200c8a [FIX](map)fix rowstore with map #28877 (#29047) 7d933200c8a is described below commit 7d933200c8ae955ff9ae481a79dffe4487b07bd1 Author: amory <wangqian...@selectdb.com> AuthorDate: Wed Dec 27 14:40:40 2023 +0800 [FIX](map)fix rowstore with map #28877 (#29047) --- be/src/vec/columns/column_map.cpp | 2 +- be/test/vec/jsonb/serialize_test.cpp | 157 +++++++++++++++++++++ .../query/test_nested_type_with_rowstore.out | 15 ++ .../datatype_p0/nested_types/query/varchar.tsv | 2 + .../query/test_nested_type_with_rowstore.groovy | 58 ++++++++ 5 files changed, 233 insertions(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index fedb45327ae..46a448fdd32 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -220,7 +220,7 @@ StringRef ColumnMap::serialize_value_into_arena(size_t n, Arena& arena, char con const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) { size_t array_size = unaligned_load<size_t>(pos); - pos += 2 * sizeof(array_size); + pos += sizeof(array_size); for (size_t i = 0; i < array_size; ++i) { pos = get_keys().deserialize_and_insert_from_arena(pos); diff --git a/be/test/vec/jsonb/serialize_test.cpp b/be/test/vec/jsonb/serialize_test.cpp index bff79435ad4..a913c04cb4f 100644 --- a/be/test/vec/jsonb/serialize_test.cpp +++ b/be/test/vec/jsonb/serialize_test.cpp @@ -44,8 +44,10 @@ #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_decimal.h" +#include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" +#include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" @@ -56,9 +58,11 @@ #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_struct.h" #include "vec/data_types/data_type_time_v2.h" #include "vec/data_types/serde/data_type_serde.h" #include "vec/runtime/vdatetime_value.h" @@ -177,6 +181,159 @@ TEST(BlockSerializeTest, Array) { EXPECT_EQ(block.dump_data(), new_block.dump_data()); } +TEST(BlockSerializeTest, Map) { + TabletSchema schema; + TabletColumn map; + map.set_name("m"); + map.set_unique_id(1); + map.set_type(FieldType::OLAP_FIELD_TYPE_MAP); + schema.append_column(map); + // map string string + DataTypePtr s = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()); + DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()); + DataTypePtr m = std::make_shared<DataTypeMap>(s, d); + Array k1, k2, v1, v2; + k1.push_back("null"); + k1.push_back("doris"); + k1.push_back("clever amory"); + v1.push_back("ss"); + v1.push_back(Null()); + v1.push_back("NULL"); + k2.push_back("hello amory"); + k2.push_back("NULL"); + k2.push_back("cute amory"); + k2.push_back("doris"); + v2.push_back("s"); + v2.push_back("0"); + v2.push_back("sf"); + v2.push_back(Null()); + Map m1, m2; + m1.push_back(k1); + m1.push_back(v1); + m2.push_back(k2); + m2.push_back(v2); + MutableColumnPtr map_column = m->create_column(); + map_column->reserve(2); + map_column->insert(m1); + map_column->insert(m2); + vectorized::ColumnWithTypeAndName type_and_name(map_column->get_ptr(), m, "test_map"); + vectorized::Block block; + block.insert(type_and_name); + + MutableColumnPtr col = ColumnString::create(); + // serialize + std::cout << "serialize to jsonb" << std::endl; + JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast<ColumnString&>(*col.get()), + block.columns(), + create_data_type_serdes(block.get_data_types())); + // deserialize + TupleDescriptor read_desc(PTupleDescriptor(), true); + // slot + TSlotDescriptor tslot; + tslot.__set_colName("m"); + tslot.nullIndicatorBit = -1; + tslot.nullIndicatorByte = 0; + TypeDescriptor type_desc(TYPE_MAP); + type_desc.children.push_back(TypeDescriptor(TYPE_STRING)); + type_desc.children.push_back(TypeDescriptor(TYPE_INT)); + type_desc.contains_nulls.push_back(true); + type_desc.contains_nulls.push_back(true); + tslot.__set_col_unique_id(1); + tslot.__set_slotType(type_desc.to_thrift()); + SlotDescriptor* slot = new SlotDescriptor(tslot); + read_desc.add_slot(slot); + + Block new_block = block.clone_empty(); + std::unordered_map<uint32_t, uint32_t> col_uid_to_idx; + std::vector<std::string> default_values; + default_values.resize(read_desc.slots().size()); + for (int i = 0; i < read_desc.slots().size(); ++i) { + col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i; + default_values[i] = read_desc.slots()[i]->col_default_value(); + std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" << i << std::endl; + } + std::cout << block.dump_data() << std::endl; + std::cout << new_block.dump_data() << std::endl; + std::cout << "deserialize from jsonb" << std::endl; + JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()), + static_cast<ColumnString&>(*col.get()), col_uid_to_idx, + new_block, default_values); + std::cout << block.dump_data() << std::endl; + std::cout << new_block.dump_data() << std::endl; + EXPECT_EQ(block.dump_data(), new_block.dump_data()); +} + +TEST(BlockSerializeTest, Struct) { + TabletSchema schema; + TabletColumn struct_col; + struct_col.set_name("struct"); + struct_col.set_unique_id(1); + struct_col.set_type(FieldType::OLAP_FIELD_TYPE_STRUCT); + schema.append_column(struct_col); + vectorized::Block block; + { + DataTypePtr s = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()); + DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>()); + DataTypePtr m = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); + DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m}); + Tuple t1, t2; + t1.push_back(String("amory cute")); + t1.push_back(__int128_t(37)); + t1.push_back(true); + t2.push_back("null"); + t2.push_back(__int128_t(26)); + t2.push_back(false); + MutableColumnPtr struct_column = st->create_column(); + struct_column->reserve(2); + struct_column->insert(t1); + struct_column->insert(t2); + vectorized::ColumnWithTypeAndName type_and_name(struct_column->get_ptr(), st, + "test_struct"); + block.insert(type_and_name); + } + + MutableColumnPtr col = ColumnString::create(); + // serialize + std::cout << "serialize to jsonb" << std::endl; + JsonbSerializeUtil::block_to_jsonb(schema, block, static_cast<ColumnString&>(*col.get()), + block.columns(), + create_data_type_serdes(block.get_data_types())); + // deserialize + TupleDescriptor read_desc(PTupleDescriptor(), true); + // slot + TSlotDescriptor tslot; + tslot.__set_colName("struct"); + tslot.nullIndicatorBit = -1; + tslot.nullIndicatorByte = 0; + TypeDescriptor type_desc(TYPE_STRUCT); + type_desc.add_sub_type(TYPE_STRING, "name", true); + type_desc.add_sub_type(TYPE_LARGEINT, "age", true); + type_desc.add_sub_type(TYPE_BOOLEAN, "is", true); + tslot.__set_col_unique_id(1); + tslot.__set_slotType(type_desc.to_thrift()); + SlotDescriptor* slot = new SlotDescriptor(tslot); + read_desc.add_slot(slot); + + Block new_block = block.clone_empty(); + std::unordered_map<uint32_t, uint32_t> col_uid_to_idx; + std::vector<std::string> default_values; + default_values.resize(read_desc.slots().size()); + for (int i = 0; i < read_desc.slots().size(); ++i) { + col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i; + default_values[i] = read_desc.slots()[i]->col_default_value(); + std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" << i << std::endl; + } + std::cout << block.dump_data() << std::endl; + std::cout << new_block.dump_data() << std::endl; + std::cout << "deserialize from jsonb" << std::endl; + JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()), + static_cast<ColumnString&>(*col.get()), col_uid_to_idx, + new_block, default_values); + std::cout << block.dump_data() << std::endl; + std::cout << new_block.dump_data() << std::endl; + EXPECT_EQ(block.dump_data(), new_block.dump_data()); +} + TEST(BlockSerializeTest, JsonbBlock) { vectorized::Block block; TabletSchema schema; diff --git a/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out b/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out new file mode 100644 index 00000000000..29cebe3421d --- /dev/null +++ b/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2} +2 doris2 {"jsonk3":333,"jsonk4":444} [300, 400] {"k2":20} {"a": 3, "b": 4} + +-- !sql -- +1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2} + +-- !sql -- +1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2} +2 apache doris 2.0 {"jsonk3":333,"jsonk4":444} [300, 400] {"k2":20} {"a": 3, "b": 4} + +-- !sql -- +1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10} {"a": 1, "b": 2} + diff --git a/regression-test/data/datatype_p0/nested_types/query/varchar.tsv b/regression-test/data/datatype_p0/nested_types/query/varchar.tsv new file mode 100644 index 00000000000..d7f794dd4ac --- /dev/null +++ b/regression-test/data/datatype_p0/nested_types/query/varchar.tsv @@ -0,0 +1,2 @@ +1 apache doris +2 apache doris 2.0 diff --git a/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy b/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy new file mode 100644 index 00000000000..4f8d0e741eb --- /dev/null +++ b/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy @@ -0,0 +1,58 @@ +import org.apache.commons.lang3.StringUtils + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_nested_type_with_rowstore") { + // this test case aim to test nested type with old planner + sql """set enable_nereids_planner=false""" + sql """ DROP TABLE IF EXISTS ct_table;""" + sql """CREATE TABLE ct_table ( `id` int(11) NOT NULL COMMENT "用户 ID", `c_varchar` varchar(65533) NULL COMMENT "用户姓名", `c_jsonb` JSONB NULL, `c_array` ARRAY<INT> NULL, `c_map` MAP<STRING, INT> NULL, `c_struct` STRUCT<a:INT, b:INT> NULL) UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", "store_row_column" = "true");""" + + sql """ insert into ct_table values(2, "doris2", '{"jsonk3": 333, "jsonk4": 444}', [300, 400], {"k2": 20}, {3, 4});""" + sql """ insert into ct_table values(1, "doris1", '{"jsonk1": 123, "jsonk2": 456}', [100, 200], {"k1": 10}, {1, 2});""" + + qt_sql """ select * from ct_table order by id;""" + // point sql + qt_sql """ select * from ct_table where id = 1""" + + // column refresh + streamLoad { + table "ct_table" + time 10000 + set 'partial_columns', 'true' + set 'strict_mode', 'false' + set 'columns', 'id,c_varchar' + file 'varchar.tsv' + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(2, json.NumberTotalRows) + assertEquals(0, json.NumberFilteredRows) + } + } + + // select and check + qt_sql """ select * from ct_table order by id;""" + // point sql + qt_sql """ select * from ct_table where id = 1""" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org