This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 7d933200c8a [FIX](map)fix rowstore with map #28877 (#29047)
7d933200c8a is described below

commit 7d933200c8ae955ff9ae481a79dffe4487b07bd1
Author: amory <wangqian...@selectdb.com>
AuthorDate: Wed Dec 27 14:40:40 2023 +0800

    [FIX](map)fix rowstore with map #28877 (#29047)
---
 be/src/vec/columns/column_map.cpp                  |   2 +-
 be/test/vec/jsonb/serialize_test.cpp               | 157 +++++++++++++++++++++
 .../query/test_nested_type_with_rowstore.out       |  15 ++
 .../datatype_p0/nested_types/query/varchar.tsv     |   2 +
 .../query/test_nested_type_with_rowstore.groovy    |  58 ++++++++
 5 files changed, 233 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index fedb45327ae..46a448fdd32 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -220,7 +220,7 @@ StringRef ColumnMap::serialize_value_into_arena(size_t n, 
Arena& arena, char con
 
 const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
     size_t array_size = unaligned_load<size_t>(pos);
-    pos += 2 * sizeof(array_size);
+    pos += sizeof(array_size);
 
     for (size_t i = 0; i < array_size; ++i) {
         pos = get_keys().deserialize_and_insert_from_arena(pos);
diff --git a/be/test/vec/jsonb/serialize_test.cpp 
b/be/test/vec/jsonb/serialize_test.cpp
index bff79435ad4..a913c04cb4f 100644
--- a/be/test/vec/jsonb/serialize_test.cpp
+++ b/be/test/vec/jsonb/serialize_test.cpp
@@ -44,8 +44,10 @@
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_complex.h"
 #include "vec/columns/column_decimal.h"
+#include "vec/columns/column_map.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
+#include "vec/columns/column_struct.h"
 #include "vec/columns/column_vector.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
@@ -56,9 +58,11 @@
 #include "vec/data_types/data_type_bitmap.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/data_type_hll.h"
+#include "vec/data_types/data_type_map.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_struct.h"
 #include "vec/data_types/data_type_time_v2.h"
 #include "vec/data_types/serde/data_type_serde.h"
 #include "vec/runtime/vdatetime_value.h"
@@ -177,6 +181,159 @@ TEST(BlockSerializeTest, Array) {
     EXPECT_EQ(block.dump_data(), new_block.dump_data());
 }
 
+TEST(BlockSerializeTest, Map) {
+    TabletSchema schema;
+    TabletColumn map;
+    map.set_name("m");
+    map.set_unique_id(1);
+    map.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
+    schema.append_column(map);
+    // map string string
+    DataTypePtr s = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    DataTypePtr d = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    DataTypePtr m = std::make_shared<DataTypeMap>(s, d);
+    Array k1, k2, v1, v2;
+    k1.push_back("null");
+    k1.push_back("doris");
+    k1.push_back("clever amory");
+    v1.push_back("ss");
+    v1.push_back(Null());
+    v1.push_back("NULL");
+    k2.push_back("hello amory");
+    k2.push_back("NULL");
+    k2.push_back("cute amory");
+    k2.push_back("doris");
+    v2.push_back("s");
+    v2.push_back("0");
+    v2.push_back("sf");
+    v2.push_back(Null());
+    Map m1, m2;
+    m1.push_back(k1);
+    m1.push_back(v1);
+    m2.push_back(k2);
+    m2.push_back(v2);
+    MutableColumnPtr map_column = m->create_column();
+    map_column->reserve(2);
+    map_column->insert(m1);
+    map_column->insert(m2);
+    vectorized::ColumnWithTypeAndName type_and_name(map_column->get_ptr(), m, 
"test_map");
+    vectorized::Block block;
+    block.insert(type_and_name);
+
+    MutableColumnPtr col = ColumnString::create();
+    // serialize
+    std::cout << "serialize to jsonb" << std::endl;
+    JsonbSerializeUtil::block_to_jsonb(schema, block, 
static_cast<ColumnString&>(*col.get()),
+                                       block.columns(),
+                                       
create_data_type_serdes(block.get_data_types()));
+    // deserialize
+    TupleDescriptor read_desc(PTupleDescriptor(), true);
+    // slot
+    TSlotDescriptor tslot;
+    tslot.__set_colName("m");
+    tslot.nullIndicatorBit = -1;
+    tslot.nullIndicatorByte = 0;
+    TypeDescriptor type_desc(TYPE_MAP);
+    type_desc.children.push_back(TypeDescriptor(TYPE_STRING));
+    type_desc.children.push_back(TypeDescriptor(TYPE_INT));
+    type_desc.contains_nulls.push_back(true);
+    type_desc.contains_nulls.push_back(true);
+    tslot.__set_col_unique_id(1);
+    tslot.__set_slotType(type_desc.to_thrift());
+    SlotDescriptor* slot = new SlotDescriptor(tslot);
+    read_desc.add_slot(slot);
+
+    Block new_block = block.clone_empty();
+    std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
+    std::vector<std::string> default_values;
+    default_values.resize(read_desc.slots().size());
+    for (int i = 0; i < read_desc.slots().size(); ++i) {
+        col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i;
+        default_values[i] = read_desc.slots()[i]->col_default_value();
+        std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" << 
i << std::endl;
+    }
+    std::cout << block.dump_data() << std::endl;
+    std::cout << new_block.dump_data() << std::endl;
+    std::cout << "deserialize from jsonb" << std::endl;
+    
JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()),
+                                       static_cast<ColumnString&>(*col.get()), 
col_uid_to_idx,
+                                       new_block, default_values);
+    std::cout << block.dump_data() << std::endl;
+    std::cout << new_block.dump_data() << std::endl;
+    EXPECT_EQ(block.dump_data(), new_block.dump_data());
+}
+
+TEST(BlockSerializeTest, Struct) {
+    TabletSchema schema;
+    TabletColumn struct_col;
+    struct_col.set_name("struct");
+    struct_col.set_unique_id(1);
+    struct_col.set_type(FieldType::OLAP_FIELD_TYPE_STRUCT);
+    schema.append_column(struct_col);
+    vectorized::Block block;
+    {
+        DataTypePtr s = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+        DataTypePtr d = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>());
+        DataTypePtr m = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
+        DataTypePtr st = 
std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m});
+        Tuple t1, t2;
+        t1.push_back(String("amory cute"));
+        t1.push_back(__int128_t(37));
+        t1.push_back(true);
+        t2.push_back("null");
+        t2.push_back(__int128_t(26));
+        t2.push_back(false);
+        MutableColumnPtr struct_column = st->create_column();
+        struct_column->reserve(2);
+        struct_column->insert(t1);
+        struct_column->insert(t2);
+        vectorized::ColumnWithTypeAndName 
type_and_name(struct_column->get_ptr(), st,
+                                                        "test_struct");
+        block.insert(type_and_name);
+    }
+
+    MutableColumnPtr col = ColumnString::create();
+    // serialize
+    std::cout << "serialize to jsonb" << std::endl;
+    JsonbSerializeUtil::block_to_jsonb(schema, block, 
static_cast<ColumnString&>(*col.get()),
+                                       block.columns(),
+                                       
create_data_type_serdes(block.get_data_types()));
+    // deserialize
+    TupleDescriptor read_desc(PTupleDescriptor(), true);
+    // slot
+    TSlotDescriptor tslot;
+    tslot.__set_colName("struct");
+    tslot.nullIndicatorBit = -1;
+    tslot.nullIndicatorByte = 0;
+    TypeDescriptor type_desc(TYPE_STRUCT);
+    type_desc.add_sub_type(TYPE_STRING, "name", true);
+    type_desc.add_sub_type(TYPE_LARGEINT, "age", true);
+    type_desc.add_sub_type(TYPE_BOOLEAN, "is", true);
+    tslot.__set_col_unique_id(1);
+    tslot.__set_slotType(type_desc.to_thrift());
+    SlotDescriptor* slot = new SlotDescriptor(tslot);
+    read_desc.add_slot(slot);
+
+    Block new_block = block.clone_empty();
+    std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
+    std::vector<std::string> default_values;
+    default_values.resize(read_desc.slots().size());
+    for (int i = 0; i < read_desc.slots().size(); ++i) {
+        col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i;
+        default_values[i] = read_desc.slots()[i]->col_default_value();
+        std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" << 
i << std::endl;
+    }
+    std::cout << block.dump_data() << std::endl;
+    std::cout << new_block.dump_data() << std::endl;
+    std::cout << "deserialize from jsonb" << std::endl;
+    
JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()),
+                                       static_cast<ColumnString&>(*col.get()), 
col_uid_to_idx,
+                                       new_block, default_values);
+    std::cout << block.dump_data() << std::endl;
+    std::cout << new_block.dump_data() << std::endl;
+    EXPECT_EQ(block.dump_data(), new_block.dump_data());
+}
+
 TEST(BlockSerializeTest, JsonbBlock) {
     vectorized::Block block;
     TabletSchema schema;
diff --git 
a/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out
 
b/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out
new file mode 100644
index 00000000000..29cebe3421d
--- /dev/null
+++ 
b/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      doris1  {"jsonk1":123,"jsonk2":456}     [100, 200]      {"k1":10}       
{"a": 1, "b": 2}
+2      doris2  {"jsonk3":333,"jsonk4":444}     [300, 400]      {"k2":20}       
{"a": 3, "b": 4}
+
+-- !sql --
+1      doris1  {"jsonk1":123,"jsonk2":456}     [100, 200]      {"k1":10}       
{"a": 1, "b": 2}
+
+-- !sql --
+1      apache doris    {"jsonk1":123,"jsonk2":456}     [100, 200]      
{"k1":10}       {"a": 1, "b": 2}
+2      apache doris 2.0        {"jsonk3":333,"jsonk4":444}     [300, 400]      
{"k2":20}       {"a": 3, "b": 4}
+
+-- !sql --
+1      apache doris    {"jsonk1":123,"jsonk2":456}     [100, 200]      
{"k1":10}       {"a": 1, "b": 2}
+
diff --git a/regression-test/data/datatype_p0/nested_types/query/varchar.tsv 
b/regression-test/data/datatype_p0/nested_types/query/varchar.tsv
new file mode 100644
index 00000000000..d7f794dd4ac
--- /dev/null
+++ b/regression-test/data/datatype_p0/nested_types/query/varchar.tsv
@@ -0,0 +1,2 @@
+1      apache doris
+2      apache doris 2.0
diff --git 
a/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy
 
b/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy
new file mode 100644
index 00000000000..4f8d0e741eb
--- /dev/null
+++ 
b/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy
@@ -0,0 +1,58 @@
+import org.apache.commons.lang3.StringUtils
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_nested_type_with_rowstore") {
+    // this test case aim to test nested type with old planner
+    sql """set enable_nereids_planner=false"""
+    sql """ DROP TABLE IF EXISTS ct_table;"""
+    sql """CREATE TABLE ct_table ( `id` int(11) NOT NULL COMMENT "用户 ID", 
`c_varchar` varchar(65533) NULL COMMENT "用户姓名", `c_jsonb` JSONB NULL, `c_array` 
ARRAY<INT> NULL, `c_map` MAP<STRING, INT> NULL, `c_struct` STRUCT<a:INT, b:INT> 
NULL) UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 
PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" = 
"true", "store_row_column" = "true");"""
+
+    sql """ insert into ct_table values(2, "doris2", '{"jsonk3": 333, 
"jsonk4": 444}', [300, 400], {"k2": 20}, {3, 4});"""
+    sql """ insert into ct_table values(1, "doris1", '{"jsonk1": 123, 
"jsonk2": 456}', [100, 200], {"k1": 10}, {1, 2});"""
+
+    qt_sql """ select * from ct_table order by id;"""
+    // point sql
+    qt_sql """ select * from ct_table where id = 1"""
+
+    // column refresh
+    streamLoad {
+            table "ct_table"
+            time 10000
+            set 'partial_columns', 'true'
+            set 'strict_mode', 'false'
+            set 'columns', 'id,c_varchar'
+            file 'varchar.tsv'
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(2, json.NumberTotalRows)
+                assertEquals(0, json.NumberFilteredRows)
+            }
+    }
+
+    // select and check
+    qt_sql """ select * from ct_table order by id;"""
+    // point sql
+    qt_sql """ select * from ct_table where id = 1"""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to