This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8b70bfdc31 [Feature](map-type) Support stream load and fix some bugs for map type (#16776) 8b70bfdc31 is described below commit 8b70bfdc31141332cbb72008083a423abcb7e741 Author: amory <amorywang...@gmail.com> AuthorDate: Sun Feb 19 15:11:54 2023 +0800 [Feature](map-type) Support stream load and fix some bugs for map type (#16776) 1、support stream load with json, csv format for map 2、fix olap convertor when compaction action in map column which has null 3、support select outToFile for map 4、add some regression-test --- be/src/olap/rowset/segment_v2/column_writer.cpp | 13 +- be/src/olap/rowset/segment_v2/column_writer.h | 2 +- be/src/vec/data_types/data_type_factory.cpp | 5 +- be/src/vec/data_types/data_type_factory.hpp | 3 +- be/src/vec/data_types/data_type_map.cpp | 171 +++++++++++++++------ be/src/vec/exprs/vexpr.cpp | 9 ++ be/src/vec/functions/function_cast.h | 12 ++ be/src/vec/olap/olap_data_convertor.h | 1 - be/src/vec/runtime/vfile_result_writer.cpp | 4 + be/src/vec/sink/vmysql_result_writer.cpp | 6 + regression-test/data/export/test_map_export.out | 8 + regression-test/data/load/insert/test_map_dml.out | 8 + .../data/load_p0/stream_load/test_map.csv | 15 ++ .../stream_load/test_map_load_and_function.out | 37 +++++ regression-test/data/map_p0/test_map_dml.out | 11 ++ .../data/query_p0/show/test_map_show_create.out | 4 + .../suites/export/test_map_export.groovy | 131 ++++++++++++++++ .../suites/load/insert/test_map_dml.groovy | 114 ++++++++++++++ .../stream_load/test_map_load_and_function.groovy | 74 +++++++++ .../query_p0/show/test_map_show_create.groovy | 69 +++++++++ 20 files changed, 643 insertions(+), 54 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 583becb2b0..a35c840eb5 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -1000,7 +1000,15 @@ Status MapColumnWriter::finish() { return Status::OK(); } -// todo. make keys and values write +Status MapColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t** ptr, + size_t num_rows) { + if (is_nullable()) { + RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows)); + } + RETURN_IF_ERROR(append_data(ptr, num_rows)); + return Status::OK(); +} + Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { auto kv_ptr = reinterpret_cast<const uint64_t*>(*ptr); for (size_t i = 0; i < 2; ++i) { @@ -1008,9 +1016,6 @@ Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { const uint8_t* val_ptr = (const uint8_t*)data; RETURN_IF_ERROR(_kv_writers[i]->append_data(&val_ptr, num_rows)); } - if (is_nullable()) { - return write_null_column(num_rows, false); - } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 7d140324dd..036c4d3baf 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -382,7 +382,7 @@ public: Status init() override; Status append_data(const uint8_t** ptr, size_t num_rows) override; - + Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override; uint64_t estimate_buffer_size() override; Status finish() override; diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 65510dd9f2..91fc51187b 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -169,9 +169,10 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo break; case TYPE_MAP: DCHECK(col_desc.children.size() == 2); + // todo. (Amory) Support Map contains_nulls in FE MapType.java Later PR nested = std::make_shared<vectorized::DataTypeMap>( - create_data_type(col_desc.children[0], col_desc.contains_nulls[0]), - create_data_type(col_desc.children[1], col_desc.contains_nulls[1])); + create_data_type(col_desc.children[0], true), + create_data_type(col_desc.children[1], true)); break; case TYPE_STRUCT: { DCHECK(col_desc.children.size() >= 1); diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index 879418a326..b2623b06da 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -113,7 +113,8 @@ public: return entity.second; } } - if (type_ptr->get_type_id() == TypeIndex::Struct) { + if (type_ptr->get_type_id() == TypeIndex::Struct || + type_ptr->get_type_id() == TypeIndex::Map) { DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr); for (const auto& entity : _invert_data_type_map) { if (entity.first->equals(*type_ptr)) { diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp index c40e0362c5..daf683c3ab 100644 --- a/be/src/vec/data_types/data_type_map.cpp +++ b/be/src/vec/data_types/data_type_map.cpp @@ -20,14 +20,16 @@ #include "gen_cpp/data.pb.h" #include "vec/columns/column_array.h" #include "vec/columns/column_map.h" +#include "vec/columns/column_nullable.h" #include "vec/common/assert_cast.h" -#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) { - key_type = keys_; - value_type = values_; + key_type = make_nullable(keys_); + value_type = make_nullable(values_); keys = std::make_shared<DataTypeArray>(key_type); values = std::make_shared<DataTypeArray>(value_type); @@ -53,7 +55,7 @@ std::string DataTypeMap::to_string(const IColumn& column, size_t row_num) const ss << ", "; } if (nested_keys_column.is_null_at(i)) { - ss << "NULL"; + ss << "null"; } else if (WhichDataType(remove_nullable(key_type)).is_string_or_fixed_string()) { ss << "'" << key_type->to_string(nested_keys_column, i) << "'"; } else { @@ -61,7 +63,7 @@ std::string DataTypeMap::to_string(const IColumn& column, size_t row_num) const } ss << ":"; if (nested_values_column.is_null_at(i)) { - ss << "NULL"; + ss << "null"; } else if (WhichDataType(remove_nullable(value_type)).is_string_or_fixed_string()) { ss << "'" << value_type->to_string(nested_values_column, i) << "'"; } else { @@ -78,6 +80,85 @@ void DataTypeMap::to_string(const class doris::vectorized::IColumn& column, size ostr.write(ss.c_str(), strlen(ss.c_str())); } +bool next_slot_from_string(ReadBuffer& rb, StringRef& output, bool& has_quota) { + StringRef element(rb.position(), 0); + has_quota = false; + if (rb.eof()) { + return false; + } + + // ltrim + while (!rb.eof() && isspace(*rb.position())) { + ++rb.position(); + element.data = rb.position(); + } + + // parse string + if (*rb.position() == '"' || *rb.position() == '\'') { + const char str_sep = *rb.position(); + size_t str_len = 1; + // search until next '"' or '\'' + while (str_len < rb.count() && *(rb.position() + str_len) != str_sep) { + ++str_len; + } + // invalid string + if (str_len >= rb.count()) { + rb.position() = rb.end(); + return false; + } + has_quota = true; + rb.position() += str_len + 1; + element.size += str_len + 1; + } + + // parse array element until map separator ':' or ',' or end '}' + while (!rb.eof() && (*rb.position() != ':') && (*rb.position() != ',') && + (rb.count() != 1 || *rb.position() != '}')) { + if (has_quota && !isspace(*rb.position())) { + return false; + } + ++rb.position(); + ++element.size; + } + // invalid array element + if (rb.eof()) { + return false; + } + // adjust read buffer position to first char of next array element + ++rb.position(); + + // rtrim + while (element.size > 0 && isspace(element.data[element.size - 1])) { + --element.size; + } + + // trim '"' and '\'' for string + if (element.size >= 2 && (element.data[0] == '"' || element.data[0] == '\'') && + element.data[0] == element.data[element.size - 1]) { + ++element.data; + element.size -= 2; + } + output = element; + return true; +} + +bool is_empty_null_element(StringRef element, IColumn* nested_column, bool has_quota) { + auto& nested_null_col = reinterpret_cast<ColumnNullable&>(*nested_column); + // handle empty element + if (element.size == 0) { + nested_null_col.get_nested_column().insert_default(); + nested_null_col.get_null_map_data().push_back(0); + return true; + } + + // handle null element + if (!has_quota && element.size == 4 && strncmp(element.data, "null", 4) == 0) { + nested_null_col.get_nested_column().insert_default(); + nested_null_col.get_null_map_data().push_back(1); + return true; + } + return false; +} Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const { DCHECK(!rb.eof()); auto* map_column = assert_cast<ColumnMap*>(column); @@ -91,57 +172,57 @@ Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const { *(rb.end() - 1)); } - std::stringstream keyCharset; - std::stringstream valCharset; - if (rb.count() == 2) { // empty map {} , need to make empty array to add offset - keyCharset << "[]"; - valCharset << "[]"; + map_column->insert_default(); } else { - // {"aaa": 1, "bbb": 20}, need to handle key and value to make key column arr and value arr + // {"aaa": 1, "bbb": 20}, need to handle key slot and value slot to make key column arr and value arr // skip "{" ++rb.position(); - keyCharset << "["; - valCharset << "["; + auto& keys_arr = reinterpret_cast<ColumnArray&>(map_column->get_keys()); + ColumnArray::Offsets64& key_off = keys_arr.get_offsets(); + auto& values_arr = reinterpret_cast<ColumnArray&>(map_column->get_values()); + ColumnArray::Offsets64& val_off = values_arr.get_offsets(); + + IColumn& nested_key_column = keys_arr.get_data(); + DCHECK(nested_key_column.is_nullable()); + IColumn& nested_val_column = values_arr.get_data(); + DCHECK(nested_val_column.is_nullable()); + + size_t element_num = 0; while (!rb.eof()) { - size_t kv_len = 0; - auto start = rb.position(); - while (!rb.eof() && *start != ',' && *start != '}') { - kv_len++; - start++; + StringRef key_element(rb.position(), rb.count()); + bool has_quota = false; + if (!next_slot_from_string(rb, key_element, has_quota)) { + return Status::InvalidArgument("Cannot read map key from text '{}'", + key_element.to_string()); } - if (kv_len >= rb.count()) { - return Status::InvalidArgument("Invalid Length"); + if (!is_empty_null_element(key_element, &nested_key_column, has_quota)) { + ReadBuffer krb(const_cast<char*>(key_element.data), key_element.size); + if (auto st = key_type->from_string(krb, &nested_key_column); !st.ok()) { + map_column->pop_back(element_num); + return st; + } } - size_t k_len = 0; - auto k_rb = rb.position(); - while (kv_len > 0 && *k_rb != ':') { - k_len++; - k_rb++; + has_quota = false; + StringRef value_element(rb.position(), rb.count()); + if (!next_slot_from_string(rb, value_element, has_quota)) { + return Status::InvalidArgument("Cannot read map value from text '{}'", + value_element.to_string()); } - ReadBuffer key_rb(rb.position(), k_len); - ReadBuffer val_rb(k_rb + 1, kv_len - k_len - 1); - - // handle key - keyCharset << key_rb.to_string(); - keyCharset << ","; - - // handle value - valCharset << val_rb.to_string(); - valCharset << ","; - - rb.position() += kv_len + 1; + if (!is_empty_null_element(value_element, &nested_val_column, has_quota)) { + ReadBuffer vrb(const_cast<char*>(value_element.data), value_element.size); + if (auto st = value_type->from_string(vrb, &nested_val_column); !st.ok()) { + map_column->pop_back(element_num); + return st; + } + } + ++element_num; } - keyCharset << ']'; - valCharset << ']'; + key_off.push_back(key_off.back() + element_num); + val_off.push_back(val_off.back() + element_num); } - - ReadBuffer kb(keyCharset.str().data(), keyCharset.str().length()); - ReadBuffer vb(valCharset.str().data(), valCharset.str().length()); - keys->from_string(kb, &map_column->get_keys()); - values->from_string(vb, &map_column->get_values()); return Status::OK(); } @@ -199,4 +280,4 @@ const char* DataTypeMap::deserialize(const char* buf, IColumn* column, int data_ data_version); } -} // namespace doris::vectorized +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 69ccf0f5bd..f90993884d 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -372,6 +372,15 @@ FunctionContext::TypeDesc VExpr::column_type_to_type_desc(const TypeDescriptor& out.children.push_back(VExpr::column_type_to_type_desc(t)); } break; + case TYPE_MAP: + CHECK(type.children.size() == 2); + // only support map key is scalar + CHECK(!type.children[0].is_complex_type()); + out.type = FunctionContext::TYPE_MAP; + for (const auto& t : type.children) { + out.children.push_back(VExpr::column_type_to_type_desc(t)); + } + break; case TYPE_STRING: out.type = FunctionContext::TYPE_STRING; out.len = type.len; diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index c8dc4282df..7ac3316fdb 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -1541,6 +1541,16 @@ private: return &ConvertImplGenericToJsonb::execute; } } + + WrapperType create_map_wrapper(const DataTypePtr& from_type, const DataTypeMap& to_type) const { + switch (from_type->get_type_id()) { + case TypeIndex::String: + return &ConvertImplGenericFromString<ColumnString>::execute; + default: + return create_unsupport_wrapper(from_type->get_name(), to_type.get_name()); + } + } + // check struct value type and get to_type value // TODO: need handle another type to cast struct WrapperType create_struct_wrapper(const DataTypePtr& from_type, @@ -1727,6 +1737,8 @@ private: static_cast<const DataTypeArray&>(*to_type)); case TypeIndex::Struct: return create_struct_wrapper(from_type, static_cast<const DataTypeStruct&>(*to_type)); + case TypeIndex::Map: + return create_map_wrapper(from_type, static_cast<const DataTypeMap&>(*to_type)); default: break; } diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 0efd5c1bd8..b23ba5ee5f 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -413,7 +413,6 @@ private: Status convert_to_olap() override; const void* get_data() const override { return _results.data(); }; - const void* get_data_at(size_t offset) const override { LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorMap"; }; diff --git a/be/src/vec/runtime/vfile_result_writer.cpp b/be/src/vec/runtime/vfile_result_writer.cpp index 2782008b34..16d48bde08 100644 --- a/be/src/vec/runtime/vfile_result_writer.cpp +++ b/be/src/vec/runtime/vfile_result_writer.cpp @@ -342,6 +342,10 @@ Status VFileResultWriter::_write_csv_file(const Block& block) { _plain_text_outstream << col.type->to_string(*col.column, i); break; } + case TYPE_MAP: { + _plain_text_outstream << col.type->to_string(*col.column, i); + break; + } default: { // not supported type, like BITMAP, just export null _plain_text_outstream << NULL_IN_CSV; diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index f74c3f94a4..bade808912 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -195,6 +195,12 @@ Status VMysqlResultWriter<is_binary_format>::_add_one_column( return Status::InternalError("pack mysql buffer failed."); } + if constexpr (is_nullable) { + if (column_ptr->is_null_at(i)) { + buf_ret = rows_buffer[i].push_null(); + continue; + } + } rows_buffer[i].open_dynamic_mode(); std::string cell_str = map_type.to_string(*column, i); buf_ret = rows_buffer[i].push_string(cell_str.c_str(), strlen(cell_str.c_str())); diff --git a/regression-test/data/export/test_map_export.out b/regression-test/data/export/test_map_export.out new file mode 100644 index 0000000000..0e0cfc3f7c --- /dev/null +++ b/regression-test/data/export/test_map_export.out @@ -0,0 +1,8 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 \N +2 {} +3 {' 33,amory ':2, ' bet ':20, ' cler ':26} +4 {'k3':23, null:20, 'k4':null} +5 {null:null} + diff --git a/regression-test/data/load/insert/test_map_dml.out b/regression-test/data/load/insert/test_map_dml.out new file mode 100644 index 0000000000..446b0a9c59 --- /dev/null +++ b/regression-test/data/load/insert/test_map_dml.out @@ -0,0 +1,8 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 {' amory ':6, 'happy':38} +6 {'amory':6, 'is':38, 'cl':0} + +-- !select -- +100 {1:'1', 2:'2', 3:'3'} {32767:'32767', 32768:'32768', 32769:'32769'} [65534, 65535, 65536] {2022-07-13:1} {2022-07-13 12:30:00:'2022-07-13 12:30:00'} {0.33:33, 0.67:67} + diff --git a/regression-test/data/load_p0/stream_load/test_map.csv b/regression-test/data/load_p0/stream_load/test_map.csv new file mode 100644 index 0000000000..cbace425cc --- /dev/null +++ b/regression-test/data/load_p0/stream_load/test_map.csv @@ -0,0 +1,15 @@ +1 \N +2 {" 11amory ":23, "beat":20, " clever ": 66} +3 {"k1": 31, "k2": 300} +4 {} +5 \N +6 {"k1":41, "k2": 400} +7 {" 33,amory ":2, " bet ":20, " cler ": 26} +8 {} +9 {' 1,amy ':2, " k2 ":90, " k7 ": 33} +10 {} +11 {"k1': 4, "k2": 400} +12 {"k3":23, null: 20, "k4": null} +13 {"null":1} +15 {:2, "k2":} +16 {null:null} diff --git a/regression-test/data/load_p0/stream_load/test_map_load_and_function.out b/regression-test/data/load_p0/stream_load/test_map_load_and_function.out new file mode 100644 index 0000000000..1b7eb1f5a3 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/test_map_load_and_function.out @@ -0,0 +1,37 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 \N +2 {' 11amory ':23, 'beat':20, ' clever ':66} +3 {'k1':31, 'k2':300} +4 {} +5 \N +6 {'k1':41, 'k2':400} +7 {' 33,amory ':2, ' bet ':20, ' cler ':26} +8 {} +9 {' 1,amy ':2, ' k2 ':90, ' k7 ':33} +10 {} +11 \N +12 {'k3':23, null:20, 'k4':null} +13 {'null':1} +15 {'':2, 'k2':0} +16 {null:null} + +-- !select -- +\N +\N +300 +\N +\N +400 +\N +\N +\N +\N +\N +\N +\N +\N +130 +0 +\N + diff --git a/regression-test/data/map_p0/test_map_dml.out b/regression-test/data/map_p0/test_map_dml.out new file mode 100644 index 0000000000..531ce9ac2e --- /dev/null +++ b/regression-test/data/map_p0/test_map_dml.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 {' amory ':6, 'happy':38} +6 {'amory':6, 'is':38, 'cl':0} + +-- !test -- +100 {1:'1', 2:'2', 3:'3'} {32767:'32767', 32768:'32768', 32769:'32769'} [65534, 65535, 65536] {2022-07-13:1} {2022-07-13 12:30:00:'2022-07-13 12:30:00'} {0.33:33, 0.67:67} + +-- !select -- +100 {1:'1', 2:'2', 3:'3'} {32767:'32767', 32768:'32768', 32769:'32769'} [65534, 65535, 65536] {2022-07-13:1} {2022-07-13 12:30:00:'2022-07-13 12:30:00'} {0.33:33, 0.67:67} + diff --git a/regression-test/data/query_p0/show/test_map_show_create.out b/regression-test/data/query_p0/show/test_map_show_create.out new file mode 100644 index 0000000000..78b706ca7d --- /dev/null +++ b/regression-test/data/query_p0/show/test_map_show_create.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +test_map_show_create CREATE TABLE `test_map_show_create` (\n `k1` int(11) NULL,\n `k2` MAP<smallint(6),text> NULL,\n `k3` MAP<int(11),text> NULL,\n `k4` MAP<date,int(11)> NULL,\n `k5` MAP<datetime,text> NULL,\n `k6` MAP<float,int(11)> NULL\n) ENGINE=OLAP\nDUPLICATE KEY(`k1`)\nCOMMENT 'OLAP'\nDISTRIBUTED BY HASH(`k1`) BUCKETS 1\nPROPERTIES (\n"replication_allocation" = "tag.location.default: 1",\n"in_memory" = "false",\n"storage_format" = "V2",\n"light_schema_change" = "true",\n"dis [...] + diff --git a/regression-test/suites/export/test_map_export.groovy b/regression-test/suites/export/test_map_export.groovy new file mode 100644 index 0000000000..9084a0a85d --- /dev/null +++ b/regression-test/suites/export/test_map_export.groovy @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_map_export", "export") { + // check whether the FE config 'enable_outfile_to_local' is true + StringBuilder strBuilder = new StringBuilder() + strBuilder.append("curl --location-trusted -u " + context.config.jdbcUser + ":" + context.config.jdbcPassword) + strBuilder.append(" http://" + context.config.feHttpAddress + "/rest/v1/config/fe") + + String command = strBuilder.toString() + def process = command.toString().execute() + def code = process.waitFor() + def err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + def out = process.getText() + logger.info("Request FE Config: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def response = parseJson(out.trim()) + assertEquals(response.code, 0) + assertEquals(response.msg, "success") + def configJson = response.data.rows + boolean enableOutfileToLocal = false + for (Object conf: configJson) { + assert conf instanceof Map + if (((Map<String, String>) conf).get("Name").toLowerCase() == "enable_outfile_to_local") { + enableOutfileToLocal = ((Map<String, String>) conf).get("Value").toLowerCase() == "true" + } + } + if (!enableOutfileToLocal) { + logger.warn("Please set enable_outfile_to_local to true to run test_outfile") + return + } + + // define the table + def testTable = "tbl_test_map" + + sql "DROP TABLE IF EXISTS ${testTable}" + sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')" + + sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + id INT, + m Map<STRING, INT> + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // make data + sql """ INSERT INTO ${testTable} VALUES (1, NULL); """ + sql """ INSERT INTO ${testTable} VALUES (2, {}); """ + sql """ INSERT INTO ${testTable} VALUES (3, {" 33,amory ":2, " bet ":20, " cler ": 26}); """ + sql """ INSERT INTO ${testTable} VALUES (4, {"k3":23, null: 20, "k4": null}); """ + sql """ INSERT INTO ${testTable} VALUES (5, {null:null}); """ + + // check result + qt_select """ SELECT * FROM ${testTable} ORDER BY id; """ + + def outFilePath = """${context.file.parent}/tmp""" + logger.info("test_map_export the outFilePath=" + outFilePath) + // map select into outfile + try { + File path = new File(outFilePath) + if (!path.exists()) { + assert path.mkdirs() + } else { + throw new IllegalStateException("""${outFilePath} already exists! """) + } + sql """ + SELECT * FROM ${testTable} ORDER BY id INTO OUTFILE "file://${outFilePath}/"; + """ + File[] files = path.listFiles() + assert files.length == 1 + + List<String> outLines = Files.readAllLines(Paths.get(files[0].getAbsolutePath()), StandardCharsets.UTF_8) + assert outLines.size() == 5 + for (int r = 0; r < outLines.size(); r++) { + String[] outLine = outLines.get(r).split("\t") + assert outLine.size() == 2 + // check NULL + if (outLine[0] == 1) { + assert outLine[1] == "\\N" + } + // check empty + if (outLine[0] == 2) { + assert outLine[1] == "{}" + } + // check key contains ',' + if (outLine[0] == 3) { + assert outLine[1] == "{\" 33,amory \":2, \" bet \":20, \" cler \": 26}" + } + // check key val NULL + if (outLine[0] == 4) { + assert outLine[1] == "{\"k3\":23, null: 20, \"k4\": null}" + } + // check key val empty + if (outLine[0] == 5) { + assert outLine[1] == "{null:null}" + } + } + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + File path = new File(outFilePath) + if (path.exists()) { + for (File f : path.listFiles()) { + f.delete(); + } + path.delete(); + } + } +} diff --git a/regression-test/suites/load/insert/test_map_dml.groovy b/regression-test/suites/load/insert/test_map_dml.groovy new file mode 100644 index 0000000000..438bd0b496 --- /dev/null +++ b/regression-test/suites/load/insert/test_map_dml.groovy @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_map_dml", "load") { + // define a sql table + def testTable = "tbl_test_map_string_int" + def testTable01 = "tbl_test_map_normal" + + sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')" + + def create_test_table = {testTablex -> + def result1 = sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + `k1` INT(11) NULL COMMENT "", + `k2` Map<STRING, INT> NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + + // DDL/DML return 1 row and 3 column, the only value is update row count + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + + // insert 1 row to check whether the table is ok + def result2 = sql "INSERT INTO ${testTable} VALUES (6, {'amory': 6, 'is': 38, 'cl': 0})" + assertTrue(result2.size() == 1) + assertTrue(result2[0].size() == 1) + assertTrue(result2[0][0] == 1, "Insert should update 1 rows") + } + + def create_test_table01 = {testTablez -> + def result1 = sql """ + CREATE TABLE IF NOT EXISTS ${testTable01} ( + `k1` int(11) NULL, + `k2` Map<smallint(6), string> NULL, + `k3` Map<int(11), string> NULL, + `k4` array<bigint(20)> NULL, + `k5` Map<date, int> NULL, + `k6` Map<datetime, string> NULL, + `k7` Map<float, int> NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2", + "disable_auto_compaction" = "false" + ) + """ + + // DDL/DML return 1 row and 3 column, the only value is update row count + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + + + def result2 = sql """ INSERT INTO ${testTable01} VALUES (100, {1: '1', 2: '2', 3:'3'}, {32767: '32767', 32768: '32768', 32769: '32769'}, + [65534, 65535, 65536], {'2022-07-13': 1}, {'2022-07-13 12:30:00': '2022-07-13 12:30:00'}, + {0.33: 33, 0.67: 67}) + """ + + assertTrue(result2.size() == 1) + assertTrue(result2[0].size() == 1) + assertTrue(result2[0][0] == 1, "Insert should update 1 rows") + } + + + // case1: string_int for map + try { + def res = sql "DROP TABLE IF EXISTS ${testTable}" + create_test_table.call(testTable) + sql "INSERT INTO ${testTable} VALUES (1, {' amory ': 6, 'happy': 38})" + + // select the table and check whether the data is correct + qt_select "SELECT * FROM ${testTable} ORDER BY k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + + // case2: normal key val type for map + try { + def res = sql "DROP TABLE IF EXISTS ${testTable01}" + + create_test_table01.call(testTable) + // select the table and check whether the data is correct + qt_select "SELECT * FROM ${testTable01} ORDER BY k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable01}") + } +} diff --git a/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy b/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy new file mode 100644 index 0000000000..d796c08b2e --- /dev/null +++ b/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_map_load_and_function", "p0") { + // define a sql table + def testTable = "tbl_test_map" + def dataFile = "test_map.csv" + + sql "DROP TABLE IF EXISTS ${testTable}" + sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')" + + sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + id INT, + m Map<STRING, INT> + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // load the map data from csv file + streamLoad { + table testTable + + file dataFile // import csv file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals("OK", json.Message) + assertEquals(15, json.NumberTotalRows) + assertTrue(json.LoadBytes > 0) + + } + } + + // check result + qt_select "SELECT * FROM ${testTable} ORDER BY id" + + // insert into valid json rows + sql """INSERT INTO ${testTable} VALUES(12, NULL)""" + sql """INSERT INTO ${testTable} VALUES(13, {"k1":100, "k2": 130})""" + + // map element_at + qt_select "SELECT m['k2'] FROM ${testTable} ORDER BY id" +} diff --git a/regression-test/suites/query_p0/show/test_map_show_create.groovy b/regression-test/suites/query_p0/show/test_map_show_create.groovy new file mode 100644 index 0000000000..463cb2babc --- /dev/null +++ b/regression-test/suites/query_p0/show/test_map_show_create.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_map_show_create", "query") { + // define a sql table + def testTable = "test_map_show_create" + + def create_test_table = {testTablex -> + def result1 = sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + `k1` INT(11) NULL, + `k2` MAP<SMALLINT(6), STRING> NULL, + `k3` MAP<INT(11), STRING> NULL, + `k4` MAP<DATE, INT> NULL, + `k5` MAP<DATETIME, STRING> NULL, + `k6` Map<FLOAT, INT> NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2", + "disable_auto_compaction" = "false" + ) + """ + + // DDL/DML return 1 row and 3 column, the only value is update row count + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + + // insert 1 row to check whether the table is ok + def result2 = sql """ INSERT INTO ${testTable} VALUES (100, {1: '1', 2: '2', 3:'3'}, + {32767: '32767', 32768: '32768', 32769: '32769'}, {'2022-07-13': 1}, + {'2022-07-13 12:30:00': '2022-07-13 12:30:00'}, {0.33: 33, 0.67: 67}) + """ + assertTrue(result2.size() == 1) + assertTrue(result2[0].size() == 1) + assertTrue(result2[0][0] == 1, "Insert should update 1 rows") + } + + try { + sql "DROP TABLE IF EXISTS ${testTable}" + sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')" + + create_test_table.call(testTable) + + qt_select "SHOW CREATE TABLE ${testTable}" + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org