[doris] branch master updated: [Feature](map-type) Support stream load and fix some bugs for map type (#16776)

xuyang Sat, 18 Feb 2023 23:12:12 -0800

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 8b70bfdc31 [Feature](map-type) Support stream load and fix some bugs 
for map type (#16776)
8b70bfdc31 is described below

commit 8b70bfdc31141332cbb72008083a423abcb7e741
Author: amory <amorywang...@gmail.com>
AuthorDate: Sun Feb 19 15:11:54 2023 +0800

    [Feature](map-type) Support stream load and fix some bugs for map type 
(#16776)
    
    1、support stream load with json, csv format for map
    2、fix olap convertor when compaction action in map column which has null
    3、support select outToFile for map
    4、add some regression-test
---
 be/src/olap/rowset/segment_v2/column_writer.cpp    |  13 +-
 be/src/olap/rowset/segment_v2/column_writer.h      |   2 +-
 be/src/vec/data_types/data_type_factory.cpp        |   5 +-
 be/src/vec/data_types/data_type_factory.hpp        |   3 +-
 be/src/vec/data_types/data_type_map.cpp            | 171 +++++++++++++++------
 be/src/vec/exprs/vexpr.cpp                         |   9 ++
 be/src/vec/functions/function_cast.h               |  12 ++
 be/src/vec/olap/olap_data_convertor.h              |   1 -
 be/src/vec/runtime/vfile_result_writer.cpp         |   4 +
 be/src/vec/sink/vmysql_result_writer.cpp           |   6 +
 regression-test/data/export/test_map_export.out    |   8 +
 regression-test/data/load/insert/test_map_dml.out  |   8 +
 .../data/load_p0/stream_load/test_map.csv          |  15 ++
 .../stream_load/test_map_load_and_function.out     |  37 +++++
 regression-test/data/map_p0/test_map_dml.out       |  11 ++
 .../data/query_p0/show/test_map_show_create.out    |   4 +
 .../suites/export/test_map_export.groovy           | 131 ++++++++++++++++
 .../suites/load/insert/test_map_dml.groovy         | 114 ++++++++++++++
 .../stream_load/test_map_load_and_function.groovy  |  74 +++++++++
 .../query_p0/show/test_map_show_create.groovy      |  69 +++++++++
 20 files changed, 643 insertions(+), 54 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 583becb2b0..a35c840eb5 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -1000,7 +1000,15 @@ Status MapColumnWriter::finish() {
     return Status::OK();
 }
 
-// todo. make keys and values write
+Status MapColumnWriter::append_nullable(const uint8_t* null_map, const 
uint8_t** ptr,
+                                        size_t num_rows) {
+    if (is_nullable()) {
+        RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
+    }
+    RETURN_IF_ERROR(append_data(ptr, num_rows));
+    return Status::OK();
+}
+
 Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
     auto kv_ptr = reinterpret_cast<const uint64_t*>(*ptr);
     for (size_t i = 0; i < 2; ++i) {
@@ -1008,9 +1016,6 @@ Status MapColumnWriter::append_data(const uint8_t** ptr, 
size_t num_rows) {
         const uint8_t* val_ptr = (const uint8_t*)data;
         RETURN_IF_ERROR(_kv_writers[i]->append_data(&val_ptr, num_rows));
     }
-    if (is_nullable()) {
-        return write_null_column(num_rows, false);
-    }
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h 
b/be/src/olap/rowset/segment_v2/column_writer.h
index 7d140324dd..036c4d3baf 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -382,7 +382,7 @@ public:
     Status init() override;
 
     Status append_data(const uint8_t** ptr, size_t num_rows) override;
-
+    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, 
size_t num_rows) override;
     uint64_t estimate_buffer_size() override;
 
     Status finish() override;
diff --git a/be/src/vec/data_types/data_type_factory.cpp 
b/be/src/vec/data_types/data_type_factory.cpp
index 65510dd9f2..91fc51187b 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -169,9 +169,10 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TypeDescriptor& col_desc, bo
         break;
     case TYPE_MAP:
         DCHECK(col_desc.children.size() == 2);
+        // todo. (Amory) Support Map contains_nulls in FE MapType.java Later PR
         nested = std::make_shared<vectorized::DataTypeMap>(
-                create_data_type(col_desc.children[0], 
col_desc.contains_nulls[0]),
-                create_data_type(col_desc.children[1], 
col_desc.contains_nulls[1]));
+                create_data_type(col_desc.children[0], true),
+                create_data_type(col_desc.children[1], true));
         break;
     case TYPE_STRUCT: {
         DCHECK(col_desc.children.size() >= 1);
diff --git a/be/src/vec/data_types/data_type_factory.hpp 
b/be/src/vec/data_types/data_type_factory.hpp
index 879418a326..b2623b06da 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -113,7 +113,8 @@ public:
                 return entity.second;
             }
         }
-        if (type_ptr->get_type_id() == TypeIndex::Struct) {
+        if (type_ptr->get_type_id() == TypeIndex::Struct ||
+            type_ptr->get_type_id() == TypeIndex::Map) {
             
DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr);
             for (const auto& entity : _invert_data_type_map) {
                 if (entity.first->equals(*type_ptr)) {
diff --git a/be/src/vec/data_types/data_type_map.cpp 
b/be/src/vec/data_types/data_type_map.cpp
index c40e0362c5..daf683c3ab 100644
--- a/be/src/vec/data_types/data_type_map.cpp
+++ b/be/src/vec/data_types/data_type_map.cpp
@@ -20,14 +20,16 @@
 #include "gen_cpp/data.pb.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_map.h"
+#include "vec/columns/column_nullable.h"
 #include "vec/common/assert_cast.h"
-#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_nullable.h"
 
 namespace doris::vectorized {
 
 DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) 
{
-    key_type = keys_;
-    value_type = values_;
+    key_type = make_nullable(keys_);
+    value_type = make_nullable(values_);
 
     keys = std::make_shared<DataTypeArray>(key_type);
     values = std::make_shared<DataTypeArray>(value_type);
@@ -53,7 +55,7 @@ std::string DataTypeMap::to_string(const IColumn& column, 
size_t row_num) const
             ss << ", ";
         }
         if (nested_keys_column.is_null_at(i)) {
-            ss << "NULL";
+            ss << "null";
         } else if 
(WhichDataType(remove_nullable(key_type)).is_string_or_fixed_string()) {
             ss << "'" << key_type->to_string(nested_keys_column, i) << "'";
         } else {
@@ -61,7 +63,7 @@ std::string DataTypeMap::to_string(const IColumn& column, 
size_t row_num) const
         }
         ss << ":";
         if (nested_values_column.is_null_at(i)) {
-            ss << "NULL";
+            ss << "null";
         } else if 
(WhichDataType(remove_nullable(value_type)).is_string_or_fixed_string()) {
             ss << "'" << value_type->to_string(nested_values_column, i) << "'";
         } else {
@@ -78,6 +80,85 @@ void DataTypeMap::to_string(const class 
doris::vectorized::IColumn& column, size
     ostr.write(ss.c_str(), strlen(ss.c_str()));
 }
 
+bool next_slot_from_string(ReadBuffer& rb, StringRef& output, bool& has_quota) 
{
+    StringRef element(rb.position(), 0);
+    has_quota = false;
+    if (rb.eof()) {
+        return false;
+    }
+
+    // ltrim
+    while (!rb.eof() && isspace(*rb.position())) {
+        ++rb.position();
+        element.data = rb.position();
+    }
+
+    // parse string
+    if (*rb.position() == '"' || *rb.position() == '\'') {
+        const char str_sep = *rb.position();
+        size_t str_len = 1;
+        // search until next '"' or '\''
+        while (str_len < rb.count() && *(rb.position() + str_len) != str_sep) {
+            ++str_len;
+        }
+        // invalid string
+        if (str_len >= rb.count()) {
+            rb.position() = rb.end();
+            return false;
+        }
+        has_quota = true;
+        rb.position() += str_len + 1;
+        element.size += str_len + 1;
+    }
+
+    // parse array element until map separator ':' or ',' or end '}'
+    while (!rb.eof() && (*rb.position() != ':') && (*rb.position() != ',') &&
+           (rb.count() != 1 || *rb.position() != '}')) {
+        if (has_quota && !isspace(*rb.position())) {
+            return false;
+        }
+        ++rb.position();
+        ++element.size;
+    }
+    // invalid array element
+    if (rb.eof()) {
+        return false;
+    }
+    // adjust read buffer position to first char of next array element
+    ++rb.position();
+
+    // rtrim
+    while (element.size > 0 && isspace(element.data[element.size - 1])) {
+        --element.size;
+    }
+
+    // trim '"' and '\'' for string
+    if (element.size >= 2 && (element.data[0] == '"' || element.data[0] == 
'\'') &&
+        element.data[0] == element.data[element.size - 1]) {
+        ++element.data;
+        element.size -= 2;
+    }
+    output = element;
+    return true;
+}
+
+bool is_empty_null_element(StringRef element, IColumn* nested_column, bool 
has_quota) {
+    auto& nested_null_col = reinterpret_cast<ColumnNullable&>(*nested_column);
+    // handle empty element
+    if (element.size == 0) {
+        nested_null_col.get_nested_column().insert_default();
+        nested_null_col.get_null_map_data().push_back(0);
+        return true;
+    }
+
+    // handle null element
+    if (!has_quota && element.size == 4 && strncmp(element.data, "null", 4) == 
0) {
+        nested_null_col.get_nested_column().insert_default();
+        nested_null_col.get_null_map_data().push_back(1);
+        return true;
+    }
+    return false;
+}
 Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const {
     DCHECK(!rb.eof());
     auto* map_column = assert_cast<ColumnMap*>(column);
@@ -91,57 +172,57 @@ Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* 
column) const {
                                        *(rb.end() - 1));
     }
 
-    std::stringstream keyCharset;
-    std::stringstream valCharset;
-
     if (rb.count() == 2) {
         // empty map {} , need to make empty array to add offset
-        keyCharset << "[]";
-        valCharset << "[]";
+        map_column->insert_default();
     } else {
-        // {"aaa": 1, "bbb": 20}, need to handle key and value to make key 
column arr and value arr
+        // {"aaa": 1, "bbb": 20}, need to handle key slot and value slot to 
make key column arr and value arr
         // skip "{"
         ++rb.position();
-        keyCharset << "[";
-        valCharset << "[";
+        auto& keys_arr = 
reinterpret_cast<ColumnArray&>(map_column->get_keys());
+        ColumnArray::Offsets64& key_off = keys_arr.get_offsets();
+        auto& values_arr = 
reinterpret_cast<ColumnArray&>(map_column->get_values());
+        ColumnArray::Offsets64& val_off = values_arr.get_offsets();
+
+        IColumn& nested_key_column = keys_arr.get_data();
+        DCHECK(nested_key_column.is_nullable());
+        IColumn& nested_val_column = values_arr.get_data();
+        DCHECK(nested_val_column.is_nullable());
+
+        size_t element_num = 0;
         while (!rb.eof()) {
-            size_t kv_len = 0;
-            auto start = rb.position();
-            while (!rb.eof() && *start != ',' && *start != '}') {
-                kv_len++;
-                start++;
+            StringRef key_element(rb.position(), rb.count());
+            bool has_quota = false;
+            if (!next_slot_from_string(rb, key_element, has_quota)) {
+                return Status::InvalidArgument("Cannot read map key from text 
'{}'",
+                                               key_element.to_string());
             }
-            if (kv_len >= rb.count()) {
-                return Status::InvalidArgument("Invalid Length");
+            if (!is_empty_null_element(key_element, &nested_key_column, 
has_quota)) {
+                ReadBuffer krb(const_cast<char*>(key_element.data), 
key_element.size);
+                if (auto st = key_type->from_string(krb, &nested_key_column); 
!st.ok()) {
+                    map_column->pop_back(element_num);
+                    return st;
+                }
             }
 
-            size_t k_len = 0;
-            auto k_rb = rb.position();
-            while (kv_len > 0 && *k_rb != ':') {
-                k_len++;
-                k_rb++;
+            has_quota = false;
+            StringRef value_element(rb.position(), rb.count());
+            if (!next_slot_from_string(rb, value_element, has_quota)) {
+                return Status::InvalidArgument("Cannot read map value from 
text '{}'",
+                                               value_element.to_string());
             }
-            ReadBuffer key_rb(rb.position(), k_len);
-            ReadBuffer val_rb(k_rb + 1, kv_len - k_len - 1);
-
-            // handle key
-            keyCharset << key_rb.to_string();
-            keyCharset << ",";
-
-            // handle value
-            valCharset << val_rb.to_string();
-            valCharset << ",";
-
-            rb.position() += kv_len + 1;
+            if (!is_empty_null_element(value_element, &nested_val_column, 
has_quota)) {
+                ReadBuffer vrb(const_cast<char*>(value_element.data), 
value_element.size);
+                if (auto st = value_type->from_string(vrb, 
&nested_val_column); !st.ok()) {
+                    map_column->pop_back(element_num);
+                    return st;
+                }
+            }
+            ++element_num;
         }
-        keyCharset << ']';
-        valCharset << ']';
+        key_off.push_back(key_off.back() + element_num);
+        val_off.push_back(val_off.back() + element_num);
     }
-
-    ReadBuffer kb(keyCharset.str().data(), keyCharset.str().length());
-    ReadBuffer vb(valCharset.str().data(), valCharset.str().length());
-    keys->from_string(kb, &map_column->get_keys());
-    values->from_string(vb, &map_column->get_values());
     return Status::OK();
 }
 
@@ -199,4 +280,4 @@ const char* DataTypeMap::deserialize(const char* buf, 
IColumn* column, int data_
                                      data_version);
 }
 
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 69ccf0f5bd..f90993884d 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -372,6 +372,15 @@ FunctionContext::TypeDesc 
VExpr::column_type_to_type_desc(const TypeDescriptor&
             out.children.push_back(VExpr::column_type_to_type_desc(t));
         }
         break;
+    case TYPE_MAP:
+        CHECK(type.children.size() == 2);
+        // only support map key is scalar
+        CHECK(!type.children[0].is_complex_type());
+        out.type = FunctionContext::TYPE_MAP;
+        for (const auto& t : type.children) {
+            out.children.push_back(VExpr::column_type_to_type_desc(t));
+        }
+        break;
     case TYPE_STRING:
         out.type = FunctionContext::TYPE_STRING;
         out.len = type.len;
diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index c8dc4282df..7ac3316fdb 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1541,6 +1541,16 @@ private:
             return &ConvertImplGenericToJsonb::execute;
         }
     }
+
+    WrapperType create_map_wrapper(const DataTypePtr& from_type, const 
DataTypeMap& to_type) const {
+        switch (from_type->get_type_id()) {
+        case TypeIndex::String:
+            return &ConvertImplGenericFromString<ColumnString>::execute;
+        default:
+            return create_unsupport_wrapper(from_type->get_name(), 
to_type.get_name());
+        }
+    }
+
     // check struct value type and get to_type value
     // TODO: need handle another type to cast struct
     WrapperType create_struct_wrapper(const DataTypePtr& from_type,
@@ -1727,6 +1737,8 @@ private:
                                         static_cast<const 
DataTypeArray&>(*to_type));
         case TypeIndex::Struct:
             return create_struct_wrapper(from_type, static_cast<const 
DataTypeStruct&>(*to_type));
+        case TypeIndex::Map:
+            return create_map_wrapper(from_type, static_cast<const 
DataTypeMap&>(*to_type));
         default:
             break;
         }
diff --git a/be/src/vec/olap/olap_data_convertor.h 
b/be/src/vec/olap/olap_data_convertor.h
index 0efd5c1bd8..b23ba5ee5f 100644
--- a/be/src/vec/olap/olap_data_convertor.h
+++ b/be/src/vec/olap/olap_data_convertor.h
@@ -413,7 +413,6 @@ private:
 
         Status convert_to_olap() override;
         const void* get_data() const override { return _results.data(); };
-
         const void* get_data_at(size_t offset) const override {
             LOG(FATAL) << "now not support get_data_at for 
OlapColumnDataConvertorMap";
         };
diff --git a/be/src/vec/runtime/vfile_result_writer.cpp 
b/be/src/vec/runtime/vfile_result_writer.cpp
index 2782008b34..16d48bde08 100644
--- a/be/src/vec/runtime/vfile_result_writer.cpp
+++ b/be/src/vec/runtime/vfile_result_writer.cpp
@@ -342,6 +342,10 @@ Status VFileResultWriter::_write_csv_file(const Block& 
block) {
                     _plain_text_outstream << col.type->to_string(*col.column, 
i);
                     break;
                 }
+                case TYPE_MAP: {
+                    _plain_text_outstream << col.type->to_string(*col.column, 
i);
+                    break;
+                }
                 default: {
                     // not supported type, like BITMAP, just export null
                     _plain_text_outstream << NULL_IN_CSV;
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp 
b/be/src/vec/sink/vmysql_result_writer.cpp
index f74c3f94a4..bade808912 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -195,6 +195,12 @@ Status 
VMysqlResultWriter<is_binary_format>::_add_one_column(
                 return Status::InternalError("pack mysql buffer failed.");
             }
 
+            if constexpr (is_nullable) {
+                if (column_ptr->is_null_at(i)) {
+                    buf_ret = rows_buffer[i].push_null();
+                    continue;
+                }
+            }
             rows_buffer[i].open_dynamic_mode();
             std::string cell_str = map_type.to_string(*column, i);
             buf_ret = rows_buffer[i].push_string(cell_str.c_str(), 
strlen(cell_str.c_str()));
diff --git a/regression-test/data/export/test_map_export.out 
b/regression-test/data/export/test_map_export.out
new file mode 100644
index 0000000000..0e0cfc3f7c
--- /dev/null
+++ b/regression-test/data/export/test_map_export.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      \N
+2      {}
+3      {'  33,amory  ':2, ' bet ':20, ' cler ':26}
+4      {'k3':23, null:20, 'k4':null}
+5      {null:null}
+
diff --git a/regression-test/data/load/insert/test_map_dml.out 
b/regression-test/data/load/insert/test_map_dml.out
new file mode 100644
index 0000000000..446b0a9c59
--- /dev/null
+++ b/regression-test/data/load/insert/test_map_dml.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      {' amory ':6, 'happy':38}
+6      {'amory':6, 'is':38, 'cl':0}
+
+-- !select --
+100    {1:'1', 2:'2', 3:'3'}   {32767:'32767', 32768:'32768', 32769:'32769'}   
[65534, 65535, 65536]   {2022-07-13:1}  {2022-07-13 12:30:00:'2022-07-13 
12:30:00'}     {0.33:33, 0.67:67}
+
diff --git a/regression-test/data/load_p0/stream_load/test_map.csv 
b/regression-test/data/load_p0/stream_load/test_map.csv
new file mode 100644
index 0000000000..cbace425cc
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_map.csv
@@ -0,0 +1,15 @@
+1      \N
+2      {"  11amory  ":23, "beat":20, " clever ": 66}
+3      {"k1": 31, "k2": 300}
+4      {}
+5      \N
+6      {"k1":41, "k2": 400}
+7      {"  33,amory  ":2, " bet ":20, " cler ": 26}
+8      {}
+9      {'  1,amy  ':2, " k2 ":90, " k7 ": 33}
+10     {}
+11     {"k1': 4, "k2": 400}
+12     {"k3":23, null: 20, "k4": null}
+13     {"null":1}
+15     {:2, "k2":}
+16     {null:null}
diff --git 
a/regression-test/data/load_p0/stream_load/test_map_load_and_function.out 
b/regression-test/data/load_p0/stream_load/test_map_load_and_function.out
new file mode 100644
index 0000000000..1b7eb1f5a3
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_map_load_and_function.out
@@ -0,0 +1,37 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      \N
+2      {'  11amory  ':23, 'beat':20, ' clever ':66}
+3      {'k1':31, 'k2':300}
+4      {}
+5      \N
+6      {'k1':41, 'k2':400}
+7      {'  33,amory  ':2, ' bet ':20, ' cler ':26}
+8      {}
+9      {'  1,amy  ':2, ' k2 ':90, ' k7 ':33}
+10     {}
+11     \N
+12     {'k3':23, null:20, 'k4':null}
+13     {'null':1}
+15     {'':2, 'k2':0}
+16     {null:null}
+
+-- !select --
+\N
+\N
+300
+\N
+\N
+400
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+130
+0
+\N
+
diff --git a/regression-test/data/map_p0/test_map_dml.out 
b/regression-test/data/map_p0/test_map_dml.out
new file mode 100644
index 0000000000..531ce9ac2e
--- /dev/null
+++ b/regression-test/data/map_p0/test_map_dml.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      {' amory ':6, 'happy':38}
+6      {'amory':6, 'is':38, 'cl':0}
+
+-- !test --
+100    {1:'1', 2:'2', 3:'3'}   {32767:'32767', 32768:'32768', 32769:'32769'}   
[65534, 65535, 65536]   {2022-07-13:1}  {2022-07-13 12:30:00:'2022-07-13 
12:30:00'}     {0.33:33, 0.67:67}
+
+-- !select --
+100    {1:'1', 2:'2', 3:'3'}   {32767:'32767', 32768:'32768', 32769:'32769'}   
[65534, 65535, 65536]   {2022-07-13:1}  {2022-07-13 12:30:00:'2022-07-13 
12:30:00'}     {0.33:33, 0.67:67}
+
diff --git a/regression-test/data/query_p0/show/test_map_show_create.out 
b/regression-test/data/query_p0/show/test_map_show_create.out
new file mode 100644
index 0000000000..78b706ca7d
--- /dev/null
+++ b/regression-test/data/query_p0/show/test_map_show_create.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+test_map_show_create   CREATE TABLE `test_map_show_create` (\n  `k1` int(11) 
NULL,\n  `k2` MAP<smallint(6),text> NULL,\n  `k3` MAP<int(11),text> NULL,\n  
`k4` MAP<date,int(11)> NULL,\n  `k5` MAP<datetime,text> NULL,\n  `k6` 
MAP<float,int(11)> NULL\n) ENGINE=OLAP\nDUPLICATE KEY(`k1`)\nCOMMENT 
'OLAP'\nDISTRIBUTED BY HASH(`k1`) BUCKETS 1\nPROPERTIES 
(\n"replication_allocation" = "tag.location.default: 1",\n"in_memory" = 
"false",\n"storage_format" = "V2",\n"light_schema_change" = "true",\n"dis [...]
+
diff --git a/regression-test/suites/export/test_map_export.groovy 
b/regression-test/suites/export/test_map_export.groovy
new file mode 100644
index 0000000000..9084a0a85d
--- /dev/null
+++ b/regression-test/suites/export/test_map_export.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_map_export", "export") {
+    // check whether the FE config 'enable_outfile_to_local' is true
+    StringBuilder strBuilder = new StringBuilder()
+    strBuilder.append("curl --location-trusted -u " + context.config.jdbcUser 
+ ":" + context.config.jdbcPassword)
+    strBuilder.append(" http://"; + context.config.feHttpAddress + 
"/rest/v1/config/fe")
+
+    String command = strBuilder.toString()
+    def process = command.toString().execute()
+    def code = process.waitFor()
+    def err = IOGroovyMethods.getText(new BufferedReader(new 
InputStreamReader(process.getErrorStream())));
+    def out = process.getText()
+    logger.info("Request FE Config: code=" + code + ", out=" + out + ", err=" 
+ err)
+    assertEquals(code, 0)
+    def response = parseJson(out.trim())
+    assertEquals(response.code, 0)
+    assertEquals(response.msg, "success")
+    def configJson = response.data.rows
+    boolean enableOutfileToLocal = false
+    for (Object conf: configJson) {
+        assert conf instanceof Map
+        if (((Map<String, String>) conf).get("Name").toLowerCase() == 
"enable_outfile_to_local") {
+            enableOutfileToLocal = ((Map<String, String>) 
conf).get("Value").toLowerCase() == "true"
+        }
+    }
+    if (!enableOutfileToLocal) {
+        logger.warn("Please set enable_outfile_to_local to true to run 
test_outfile")
+        return
+    }
+
+    // define the table
+    def testTable = "tbl_test_map"
+
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${testTable} (
+            id INT,
+            m Map<STRING, INT>
+        )
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+
+    // make data
+    sql """ INSERT INTO ${testTable} VALUES (1, NULL); """
+    sql """ INSERT INTO ${testTable} VALUES (2, {}); """
+    sql """ INSERT INTO ${testTable} VALUES (3, {"  33,amory  ":2, " bet ":20, 
" cler ": 26}); """
+    sql """ INSERT INTO ${testTable} VALUES (4, {"k3":23, null: 20, "k4": 
null}); """
+    sql """ INSERT INTO ${testTable} VALUES (5, {null:null}); """
+
+    // check result
+    qt_select """ SELECT * FROM ${testTable} ORDER BY id; """
+
+    def outFilePath = """${context.file.parent}/tmp"""
+    logger.info("test_map_export the outFilePath=" + outFilePath)
+    // map select into outfile
+    try {
+        File path = new File(outFilePath)
+        if (!path.exists()) {
+            assert path.mkdirs()
+        } else {
+            throw new IllegalStateException("""${outFilePath} already exists! 
""")
+        }
+        sql """
+                    SELECT * FROM ${testTable} ORDER BY id INTO OUTFILE 
"file://${outFilePath}/";
+        """
+        File[] files = path.listFiles()
+        assert files.length == 1
+
+        List<String> outLines = 
Files.readAllLines(Paths.get(files[0].getAbsolutePath()), 
StandardCharsets.UTF_8)
+        assert outLines.size() == 5
+        for (int r = 0; r < outLines.size(); r++) {
+            String[] outLine = outLines.get(r).split("\t")
+            assert outLine.size() == 2
+            // check NULL
+            if (outLine[0] == 1) {
+                assert outLine[1] == "\\N"
+            }
+            // check empty
+            if (outLine[0] == 2) {
+                assert outLine[1] == "{}"
+            }
+            // check key contains ','
+            if (outLine[0] == 3) {
+                assert outLine[1] == "{\"  33,amory  \":2, \" bet \":20, \" 
cler \": 26}"
+            }
+            // check key val NULL
+            if (outLine[0] == 4) {
+                assert outLine[1] == "{\"k3\":23, null: 20, \"k4\": null}"
+            }
+            // check key val empty
+            if (outLine[0] == 5) {
+                assert outLine[1] == "{null:null}"
+            }
+        }
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+        File path = new File(outFilePath)
+        if (path.exists()) {
+            for (File f : path.listFiles()) {
+                f.delete();
+            }
+            path.delete();
+        }
+    }
+}
diff --git a/regression-test/suites/load/insert/test_map_dml.groovy 
b/regression-test/suites/load/insert/test_map_dml.groovy
new file mode 100644
index 0000000000..438bd0b496
--- /dev/null
+++ b/regression-test/suites/load/insert/test_map_dml.groovy
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map_dml", "load") {
+    // define a sql table
+    def testTable = "tbl_test_map_string_int"
+    def testTable01 = "tbl_test_map_normal"
+
+    sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+    def create_test_table = {testTablex ->
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+              `k1` INT(11) NULL COMMENT "",
+              `k2` Map<STRING, INT> NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+            """
+        
+        // DDL/DML return 1 row and 3 column, the only value is update row 
count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+        
+        // insert 1 row to check whether the table is ok
+        def result2 = sql "INSERT INTO ${testTable} VALUES (6, {'amory': 6, 
'is': 38, 'cl': 0})"
+        assertTrue(result2.size() == 1)
+        assertTrue(result2[0].size() == 1)
+        assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+    }
+
+    def create_test_table01 = {testTablez ->
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable01} (
+              `k1` int(11) NULL,
+              `k2` Map<smallint(6), string> NULL,
+              `k3` Map<int(11), string> NULL,
+              `k4` array<bigint(20)> NULL,
+              `k5` Map<date, int> NULL,
+              `k6` Map<datetime, string> NULL,
+              `k7` Map<float, int> NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "in_memory" = "false",
+            "storage_format" = "V2",
+            "disable_auto_compaction" = "false"
+            )
+            """
+        
+        // DDL/DML return 1 row and 3 column, the only value is update row 
count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+        
+
+        def result2 = sql """ INSERT INTO ${testTable01} VALUES (100, {1: '1', 
2: '2', 3:'3'}, {32767: '32767', 32768: '32768', 32769: '32769'},
+                        [65534, 65535, 65536], {'2022-07-13': 1}, {'2022-07-13 
12:30:00': '2022-07-13 12:30:00'},
+                        {0.33: 33, 0.67: 67})
+                        """
+
+        assertTrue(result2.size() == 1)
+        assertTrue(result2[0].size() == 1)
+        assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+    }
+
+
+    // case1: string_int for map
+    try {
+        def res = sql "DROP TABLE IF EXISTS ${testTable}"
+        create_test_table.call(testTable)
+        sql "INSERT INTO ${testTable} VALUES (1, {' amory ': 6, 'happy': 38})"
+
+        // select the table and check whether the data is correct
+        qt_select "SELECT * FROM ${testTable} ORDER BY k1"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
+    // case2: normal key val type for map
+    try {
+        def res = sql "DROP TABLE IF EXISTS ${testTable01}"
+
+        create_test_table01.call(testTable)
+        // select the table and check whether the data is correct
+        qt_select "SELECT * FROM ${testTable01} ORDER BY k1"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable01}")
+    }
+}
diff --git 
a/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy 
b/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy
new file mode 100644
index 0000000000..d796c08b2e
--- /dev/null
+++ 
b/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_map_load_and_function", "p0") {
+    // define a sql table
+    def testTable = "tbl_test_map"
+    def dataFile = "test_map.csv"
+
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${testTable} (
+            id INT,
+            m Map<STRING, INT>
+        )
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+
+    // load the map data from csv file
+    streamLoad {
+        table testTable
+
+        file dataFile // import csv file
+        time 10000 // limit inflight 10s
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals("OK", json.Message)
+            assertEquals(15, json.NumberTotalRows)
+            assertTrue(json.LoadBytes > 0)
+
+        }
+    }
+
+    // check result
+    qt_select "SELECT * FROM ${testTable} ORDER BY id"
+
+    // insert into valid json rows
+    sql """INSERT INTO ${testTable} VALUES(12, NULL)"""
+    sql """INSERT INTO ${testTable} VALUES(13, {"k1":100, "k2": 130})"""
+
+    // map element_at
+    qt_select "SELECT m['k2'] FROM ${testTable} ORDER BY id"
+}
diff --git a/regression-test/suites/query_p0/show/test_map_show_create.groovy 
b/regression-test/suites/query_p0/show/test_map_show_create.groovy
new file mode 100644
index 0000000000..463cb2babc
--- /dev/null
+++ b/regression-test/suites/query_p0/show/test_map_show_create.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map_show_create", "query") {
+    // define a sql table
+    def testTable = "test_map_show_create"
+
+    def create_test_table = {testTablex ->
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+              `k1` INT(11) NULL,
+              `k2` MAP<SMALLINT(6), STRING> NULL,
+              `k3` MAP<INT(11), STRING> NULL,
+              `k4` MAP<DATE, INT> NULL,
+              `k5` MAP<DATETIME, STRING> NULL,
+              `k6` Map<FLOAT, INT> NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "in_memory" = "false",
+            "storage_format" = "V2",
+            "disable_auto_compaction" = "false"
+            )
+            """
+
+        // DDL/DML return 1 row and 3 column, the only value is update row 
count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+        // insert 1 row to check whether the table is ok
+        def result2 = sql """ INSERT INTO ${testTable} VALUES (100, {1: '1', 
2: '2', 3:'3'}, 
+                        {32767: '32767', 32768: '32768', 32769: '32769'}, 
{'2022-07-13': 1}, 
+                        {'2022-07-13 12:30:00': '2022-07-13 12:30:00'}, {0.33: 
33, 0.67: 67})
+                        """
+        assertTrue(result2.size() == 1)
+        assertTrue(result2[0].size() == 1)
+        assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+    }
+
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+        sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+        create_test_table.call(testTable)
+
+        qt_select "SHOW CREATE TABLE ${testTable}"
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris] branch master updated: [Feature](map-type) Support stream load and fix some bugs for map type (#16776)

Reply via email to