This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fa3c7d98c8b [fix](map) the implementation of ColumnMap::replicate was 
incorrect" (#26647)
fa3c7d98c8b is described below

commit fa3c7d98c8b7002a4aac7250908d6ecacf3913d7
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Mon Nov 13 12:17:14 2023 +0800

    [fix](map) the implementation of ColumnMap::replicate was incorrect" 
(#26647)
---
 be/src/vec/columns/column_array.cpp                | 51 +++++++++-----------
 be/src/vec/columns/column_map.cpp                  | 22 ++++++---
 .../data/datatype_p0/complex_types/test_map.out    | 16 +++++++
 .../data/nereids_p0/datatype/test_map.out          | 16 +++++++
 .../datatype_p0/complex_types/test_map.groovy      | 54 +++++++++++++++++++++
 .../suites/nereids_p0/datatype/test_map.groovy     | 56 ++++++++++++++++++++++
 6 files changed, 180 insertions(+), 35 deletions(-)

diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index 374dd17a88a..fa9e0486367 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -833,42 +833,35 @@ ColumnPtr ColumnArray::replicate(const IColumn::Offsets& 
replicate_offsets) cons
     return replicate_generic(replicate_offsets);
 }
 
-void ColumnArray::replicate(const uint32_t* indexs, size_t target_size, 
IColumn& column) const {
+void ColumnArray::replicate(const uint32_t* indices, size_t target_size, 
IColumn& column) const {
     if (target_size == 0) {
         return;
     }
-    auto total_size = get_offsets().size();
-    // 
|---------------------|-------------------------|-------------------------|
-    // [0, begin)             [begin, begin + count_sz)  [begin + count_sz, 
size())
-    //  do not need to copy    copy counts[n] times       do not need to copy
-    IColumn::Offsets replicate_offsets(total_size, 0);
-    // copy original data at offset n counts[n] times
-    auto begin = 0, end = 0;
-    while (begin < target_size) {
-        while (end < target_size && indexs[begin] == indexs[end]) {
-            end++;
-        }
-        long index = indexs[begin];
-        replicate_offsets[index] = end - begin;
-        begin = end;
-    }
 
-    // ignored
-    for (size_t i = 1; i < total_size; ++i) {
-        replicate_offsets[i] += replicate_offsets[i - 1];
-    }
+    auto& dst_col = assert_cast<ColumnArray&>(column);
+    auto& dst_data_col = dst_col.get_data();
+    auto& dst_offsets = dst_col.get_offsets();
+    dst_offsets.reserve(target_size);
 
-    auto rep_res = replicate(replicate_offsets);
-    if (!rep_res) {
-        LOG(WARNING) << "ColumnArray replicate failed, replicate_offsets 
count="
-                     << replicate_offsets.size() << ", max=" << 
replicate_offsets.back();
-        return;
+    PODArray<uint32> data_indices_to_replicate;
+
+    for (size_t i = 0; i < target_size; ++i) {
+        const auto index = indices[i];
+        const auto start = offset_at(index);
+        const auto length = size_at(index);
+        dst_offsets.push_back(dst_offsets.back() + length);
+        if (UNLIKELY(length == 0)) {
+            continue;
+        }
+
+        data_indices_to_replicate.reserve(data_indices_to_replicate.size() + 
length);
+        for (size_t j = start; j != start + length; ++j) {
+            data_indices_to_replicate.push_back(j);
+        }
     }
-    auto& rep_res_arr = typeid_cast<const ColumnArray&>(*rep_res);
 
-    ColumnArray& res_arr = typeid_cast<ColumnArray&>(column);
-    res_arr.data = rep_res_arr.get_data_ptr();
-    res_arr.offsets = rep_res_arr.get_offsets_ptr();
+    get_data().replicate(data_indices_to_replicate.data(), 
data_indices_to_replicate.size(),
+                         dst_data_col);
 }
 
 template <typename T>
diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index 47a41c3dcfe..f3fa5ab6c26 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -433,14 +433,24 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) 
const {
     return res;
 }
 
-void ColumnMap::replicate(const uint32_t* indexs, size_t target_size, IColumn& 
column) const {
+void ColumnMap::replicate(const uint32_t* indices, size_t target_size, 
IColumn& column) const {
     auto& res = reinterpret_cast<ColumnMap&>(column);
 
-    // Make a temp column array for reusing its replicate function
-    ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable())
-            ->replicate(indexs, target_size, 
res.keys_column->assume_mutable_ref());
-    ColumnArray::create(values_column->assume_mutable(), 
offsets_column->assume_mutable())
-            ->replicate(indexs, target_size, 
res.values_column->assume_mutable_ref());
+    auto keys_array =
+            ColumnArray::create(keys_column->assume_mutable(), 
offsets_column->assume_mutable());
+
+    auto result_array = ColumnArray::create(res.keys_column->assume_mutable(),
+                                            
res.offsets_column->assume_mutable());
+    keys_array->replicate(indices, target_size, 
result_array->assume_mutable_ref());
+
+    result_array = ColumnArray::create(res.values_column->assume_mutable(),
+                                       res.offsets_column->clone_empty());
+
+    auto values_array =
+            ColumnArray::create(values_column->assume_mutable(), 
offsets_column->assume_mutable());
+
+    /// FIXME: To reuse the replicate of ColumnArray, the offsets column was 
replicated twice
+    values_array->replicate(indices, target_size, 
result_array->assume_mutable_ref());
 }
 
 MutableColumnPtr ColumnMap::get_shrinked_column() {
diff --git a/regression-test/data/datatype_p0/complex_types/test_map.out 
b/regression-test/data/datatype_p0/complex_types/test_map.out
new file mode 100644
index 00000000000..00a74e74af0
--- /dev/null
+++ b/regression-test/data/datatype_p0/complex_types/test_map.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      {"key1":"value1"}       1       1
+1      {"key1_1":"value1_1"}   1       1
+1      {"key1":"value1"}       2       1
+1      {"key1_1":"value1_1"}   2       1
+2      {"key2":"value2", "key22":"value22"}    3       2
+2      {"key2_1":"value2_1", "key22_1":"value22_1"}    3       2
+2      {"key2_2":"value2_2", "key22_2":"value22_2"}    3       2
+2      {"key2":"value2", "key22":"value22"}    4       2
+2      {"key2_1":"value2_1", "key22_1":"value22_1"}    4       2
+2      {"key2_2":"value2_2", "key22_2":"value22_2"}    4       2
+3      {"key3":"value3", "key33":"value33", "key3333":"value333"}      5       
3
+3      {"key3":"value3", "key33":"value33", "key3333":"value333"}      6       
3
+4      {"key4":"value4", "key44":"value44", "key444":"value444", 
"key4444":"value4444"}        \N      \N
+
diff --git a/regression-test/data/nereids_p0/datatype/test_map.out 
b/regression-test/data/nereids_p0/datatype/test_map.out
new file mode 100644
index 00000000000..00a74e74af0
--- /dev/null
+++ b/regression-test/data/nereids_p0/datatype/test_map.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      {"key1":"value1"}       1       1
+1      {"key1_1":"value1_1"}   1       1
+1      {"key1":"value1"}       2       1
+1      {"key1_1":"value1_1"}   2       1
+2      {"key2":"value2", "key22":"value22"}    3       2
+2      {"key2_1":"value2_1", "key22_1":"value22_1"}    3       2
+2      {"key2_2":"value2_2", "key22_2":"value22_2"}    3       2
+2      {"key2":"value2", "key22":"value22"}    4       2
+2      {"key2_1":"value2_1", "key22_1":"value22_1"}    4       2
+2      {"key2_2":"value2_2", "key22_2":"value22_2"}    4       2
+3      {"key3":"value3", "key33":"value33", "key3333":"value333"}      5       
3
+3      {"key3":"value3", "key33":"value33", "key3333":"value333"}      6       
3
+4      {"key4":"value4", "key44":"value44", "key444":"value444", 
"key4444":"value4444"}        \N      \N
+
diff --git a/regression-test/suites/datatype_p0/complex_types/test_map.groovy 
b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
new file mode 100644
index 00000000000..6a3d28c5ed0
--- /dev/null
+++ b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map") {
+    sql 'set enable_nereids_planner=false'
+    sql "DROP TABLE IF EXISTS `test_map_table`"
+    sql """
+        create table `test_map_table` (
+            `k1` int,
+            `value` map<text, text>
+        ) distributed by hash(`k1`) buckets 1 properties("replication_num" = 
"1");
+    """
+
+    sql 'insert into `test_map_table` values (1, {"key1": "value1"});'
+    sql 'insert into `test_map_table` values (1, {"key1_1": "value1_1"});'
+    sql 'insert into `test_map_table` values (2, {"key2": "value2", "key22": 
"value22"});'
+    sql 'insert into `test_map_table` values (2, {"key2_1": "value2_1", 
"key22_1": "value22_1"});'
+    sql 'insert into `test_map_table` values (2, {"key2_2": "value2_2", 
"key22_2": "value22_2"});'
+    sql 'insert into `test_map_table` values (3, {"key3": "value3", "key33": 
"value33", "key3333": "value333"});'
+    sql 'insert into `test_map_table` values (4, {"key4": "value4", "key44": 
"value44", "key444": "value444", "key4444": "value4444"});'
+
+    sql "DROP TABLE IF EXISTS `test_map_table_right`"
+    sql """
+        create table `test_map_table_right` (
+            `id` int,
+            `value` int
+        ) distributed by hash(`id`) buckets 1 properties("replication_num" = 
"1");
+    """
+
+    sql 'insert into `test_map_table_right` values(1, 1);'
+    sql 'insert into `test_map_table_right` values(2, 1);'
+    sql 'insert into `test_map_table_right` values(3, 2);'
+    sql 'insert into `test_map_table_right` values(4, 2);'
+    sql 'insert into `test_map_table_right` values(5, 3);'
+    sql 'insert into `test_map_table_right` values(6, 3);'
+
+    qt_sql """
+        select * from test_map_table left join test_map_table_right on 
test_map_table.k1 = test_map_table_right.value order by 1,3;
+    """
+}
diff --git a/regression-test/suites/nereids_p0/datatype/test_map.groovy 
b/regression-test/suites/nereids_p0/datatype/test_map.groovy
new file mode 100644
index 00000000000..54f41e421f3
--- /dev/null
+++ b/regression-test/suites/nereids_p0/datatype/test_map.groovy
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map") {
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+
+    sql "DROP TABLE IF EXISTS `test_map_table`"
+    sql """
+        create table `test_map_table` (
+            `k1` int,
+            `value` map<text, text>
+        ) distributed by hash(`k1`) buckets 1 properties("replication_num" = 
"1");
+    """
+
+    sql 'insert into `test_map_table` values (1, {"key1": "value1"});'
+    sql 'insert into `test_map_table` values (1, {"key1_1": "value1_1"});'
+    sql 'insert into `test_map_table` values (2, {"key2": "value2", "key22": 
"value22"});'
+    sql 'insert into `test_map_table` values (2, {"key2_1": "value2_1", 
"key22_1": "value22_1"});'
+    sql 'insert into `test_map_table` values (2, {"key2_2": "value2_2", 
"key22_2": "value22_2"});'
+    sql 'insert into `test_map_table` values (3, {"key3": "value3", "key33": 
"value33", "key3333": "value333"});'
+    sql 'insert into `test_map_table` values (4, {"key4": "value4", "key44": 
"value44", "key444": "value444", "key4444": "value4444"});'
+
+    sql "DROP TABLE IF EXISTS `test_map_table_right`"
+    sql """
+        create table `test_map_table_right` (
+            `id` int,
+            `value` int
+        ) distributed by hash(`id`) buckets 1 properties("replication_num" = 
"1");
+    """
+
+    sql 'insert into `test_map_table_right` values(1, 1);'
+    sql 'insert into `test_map_table_right` values(2, 1);'
+    sql 'insert into `test_map_table_right` values(3, 2);'
+    sql 'insert into `test_map_table_right` values(4, 2);'
+    sql 'insert into `test_map_table_right` values(5, 3);'
+    sql 'insert into `test_map_table_right` values(6, 3);'
+
+    qt_sql """
+        select * from test_map_table left join test_map_table_right on 
test_map_table.k1 = test_map_table_right.value order by 1,3;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to