This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5f6f35e886 Add the supported sub-type for array (#10824) 5f6f35e886 is described below commit 5f6f35e88664eae4033ee6ed5b17e91e111dac68 Author: carlvinhust2012 <huchengha...@126.com> AuthorDate: Thu Jul 21 16:29:17 2022 +0800 Add the supported sub-type for array (#10824) 1.This pr is used for adding the supported sub-type for array which has been modified in #9916 2.add regression test for the supported sub-type Co-authored-by: hucheng01 <huchen...@baidu.com> --- .../java/org/apache/doris/analysis/TypeDef.java | 14 +- .../data/load/broker_load/simple_array.json | 5 + .../data/load/broker_load/simple_object_array.json | 5 + .../data/load/broker_load/test_array_load.out | 33 ++++ .../suites/load/broker_load/test_array_load.groovy | 198 +++++++++++++++++++++ 5 files changed, 252 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java index 91ce563896..7af29a8fc2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -117,10 +117,18 @@ public class TypeDef implements ParseNode { if (type.isNull()) { throw new AnalysisException("Unsupported data type: " + type.toSql()); } - if (!type.getPrimitiveType().isIntegerType() - && !type.getPrimitiveType().isCharFamily()) { - throw new AnalysisException("Array column just support INT/VARCHAR sub-type"); + // check whether the array sub-type is supported + Boolean isSupportType = false; + for (Type subType : Type.getArraySubTypes()) { + if (type.getPrimitiveType() == subType.getPrimitiveType()) { + isSupportType = true; + break; + } } + if (!isSupportType) { + throw new AnalysisException("Array unsupported sub-type: " + type.toSql()); + } + if (type.getPrimitiveType().isStringType() && !type.isAssignedStrLenInColDefinition()) { type.setLength(1); diff --git a/regression-test/data/load/broker_load/simple_array.json b/regression-test/data/load/broker_load/simple_array.json new file mode 100644 index 0000000000..15fc2a3964 --- /dev/null +++ b/regression-test/data/load/broker_load/simple_array.json @@ -0,0 +1,5 @@ +[{"k1": 1, "k2": [1,2,3,4,5], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}, +{"k1": 2, "k2": [6,7,8,9,10], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}, +{"k1": 3, "k2": [], "k3": [32767,32768,32769], "k4": [null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["happy","birthday"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}, +{"k1": 4, "k2": [null], "k3": [32767,32768,32769], "k4": [ null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}, +{"k1": 5, "k2": [null,null], "k3": [32767,32768,null], "k4": [65534,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}] \ No newline at end of file diff --git a/regression-test/data/load/broker_load/simple_object_array.json b/regression-test/data/load/broker_load/simple_object_array.json new file mode 100644 index 0000000000..ca57e52676 --- /dev/null +++ b/regression-test/data/load/broker_load/simple_object_array.json @@ -0,0 +1,5 @@ +{"k1": 1, "k2": [1,2,3,4,5], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]} +{"k1": 2, "k2": [6,7,8,9,10], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]} +{"k1": 3, "k2": [], "k3": [32767,32768,32769], "k4": [null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["happy","birthday"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]} +{"k1": 4, "k2": [null], "k3": [32767,32768,32769], "k4": [ null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]} +{"k1": 5, "k2": [null,null], "k3": [32767,32768,null], "k4": [65534,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]} \ No newline at end of file diff --git a/regression-test/data/load/broker_load/test_array_load.out b/regression-test/data/load/broker_load/test_array_load.out new file mode 100644 index 0000000000..e6189ea55d --- /dev/null +++ b/regression-test/data/load/broker_load/test_array_load.out @@ -0,0 +1,33 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + diff --git a/regression-test/suites/load/broker_load/test_array_load.groovy b/regression-test/suites/load/broker_load/test_array_load.groovy new file mode 100644 index 0000000000..d74dd5cbe7 --- /dev/null +++ b/regression-test/suites/load/broker_load/test_array_load.groovy @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_load", "load") { + // define a sql table + def testTable = "tbl_test_array_load" + + def create_test_table = {testTablex, enable_vectorized_flag -> + // multi-line sql + sql """ set enable_array_type = true """ + + if (enable_vectorized_flag) { + sql """ set enable_vectorized_engine = true """ + } + + def result1 = sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + `k1` INT(11) NULL COMMENT "", + `k2` ARRAY<SMALLINT> NOT NULL COMMENT "", + `k3` ARRAY<INT(11)> NOT NULL COMMENT "", + `k4` ARRAY<BIGINT> NOT NULL COMMENT "", + `k5` ARRAY<CHAR> NOT NULL COMMENT "", + `k6` ARRAY<VARCHAR(20)> NULL COMMENT "", + `k7` ARRAY<DATE> NOT NULL COMMENT "", + `k8` ARRAY<DATETIME> NOT NULL COMMENT "", + `k9` ARRAY<FLOAT> NOT NULL COMMENT "", + `k10` ARRAY<DOUBLE> NOT NULL COMMENT "", + `k11` ARRAY<DECIMAL(20, 6)> NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + + // DDL/DML return 1 row and 3 column, the only value is update row count + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + + // insert 1 row to check whether the table is ok + def result2 = sql """ INSERT INTO ${testTable} VALUES + (100, [1, 2, 3], [32767, 32768, 32769], [65534, 65535, 65536], ['a', 'b', 'c'], ["hello", "world"], + ['2022-07-13'], ['2022-07-13 12:30:00'], [0.33, 0.67], [3.1415926, 0.878787878], [4, 5.5, 6.67]) + """ + assertTrue(result2.size() == 1) + assertTrue(result2[0].size() == 1) + assertTrue(result2[0][0] == 1, "Insert should update 1 rows") + } + + def load_array_data = {strip_flag, read_flag, format_flag, exprs, json_paths, + json_root, where_expr, fuzzy_flag, file_name -> + // load the json data + streamLoad { + table "tbl_test_array_load" + + // set http request header params + set 'strip_outer_array', strip_flag + set 'read_json_by_line', read_flag + set 'format', format_flag + set 'columns', exprs + set 'jsonpaths', json_paths + set 'json_root', json_root + set 'where', where_expr + set 'fuzzy_parse', fuzzy_flag + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + def load_from_hdfs = {testTablex, label, hdfsFilePath, format, brokerName, hdfsUser, hdfsPasswd -> + def result1= sql """ + LOAD LABEL ${label} ( + DATA INFILE("${hdfsFilePath}") + INTO TABLE ${testTablex} + FORMAT as "${format}") + with BROKER "${brokerName}" ( + "username"="${hdfsUser}", + "password"="${hdfsPasswd}") + PROPERTIES ( + "timeout"="1200", + "max_filter_ratio"="0.1"); + """ + + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected") + } + + // case1: import array data in json format and enable vectorized engine + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, true) + + load_array_data.call('true', '', 'json', '', '', '', '', '', 'simple_array.json') + + // select the table and check whether the data is correct + qt_select "select * from ${testTable} order by k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + + // case2: import array data in json format and disable vectorized engine + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, false) + + load_array_data.call('true', '', 'json', '', '', '', '', '', 'simple_array.json') + + // select the table and check whether the data is correct + qt_select "select * from ${testTable} order by k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + + // if 'enableHdfs' in regression-conf.groovy has been set to true, + // the test will run these case as below. + if (enableHdfs()) { + brokerName =getBrokerName() + hdfsUser = getHdfsUser() + hdfsPasswd = getHdfsPasswd() + def hdfs_file_path = uploadToHdfs "broker_load/simple_object_array.json" + def format = "json" + + // case3: import array data by hdfs and enable vectorized engine + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, true) + + def test_load_label = UUID.randomUUID().toString().replaceAll("-", "") + load_from_hdfs.call(testTable, test_load_label, hdfs_file_path, format, + brokerName, hdfsUser, hdfsPasswd) + + // wait to load finished + sleep(5000) + + // select the table and check whether the data is correct + qt_select "select * from ${testTable} order by k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + + // case4: import array data by hdfs and disable vectorized engine + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, false) + + def test_load_label = UUID.randomUUID().toString().replaceAll("-", "") + load_from_hdfs.call(testTable, test_load_label, hdfs_file_path, format, + brokerName, hdfsUser, hdfsPasswd) + + // wait to load finished + sleep(5000) + + // select the table and check whether the data is correct + qt_select "select * from ${testTable} order by k1" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org