This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new bae9d8d7f2 [Feature-Wip](MySQL LOAD)Add trim quotes property for mysql load (#17775) bae9d8d7f2 is described below commit bae9d8d7f2da0e7cf9cc6a06a788eeaaf80fae9b Author: huangzhaowei <huangzhaowei....@bytedance.com> AuthorDate: Tue Mar 21 00:32:58 2023 +0800 [Feature-Wip](MySQL LOAD)Add trim quotes property for mysql load (#17775) Add trim quotes property for mysql load to trim double quotes in the load files. --- .../Load/MYSQL-LOAD.md | 2 + .../Load/MYSQL-LOAD.md | 2 + .../apache/doris/load/loadv2/MysqlLoadManager.java | 6 ++ .../load_p0/mysql_load/mysql_load_trim_quotes.csv | 4 ++ .../mysql_load/test_mysql_load_trim_quotes.out | 13 +++++ .../mysql_load/test_mysql_load_trim_quotes.groovy | 67 ++++++++++++++++++++++ 6 files changed, 94 insertions(+) diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md index bff91fe328..69c6fe5053 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md @@ -77,6 +77,8 @@ This import method can still guarantee the atomicity of a batch of import tasks, 5. exec_mem_limit: Import memory limit. Default is 2GB. The unit is bytes. +6. trim_double_quotes: Boolean type, The default value is false. True means that the outermost double quotes of each field in the load file are trimmed. + ### Example 1. Import the data from the client side local file `testData` into the table `testTbl` in the database `testDb`. Specify a timeout of 100 seconds diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md index bc660d6082..b5d6e852b1 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md @@ -76,6 +76,8 @@ INTO TABLE tbl_name 5. exec_mem_limit: 导入内存限制。默认为 2GB。单位为字节。 +6. trim_double_quotes: 布尔类型,默认值为 false,为 true 时表示裁剪掉导入文件每个字段最外层的双引号。 + ### Example 1. 将客户端本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表。指定超时时间为 100 秒 diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/MysqlLoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/MysqlLoadManager.java index 44ceb32bad..e57051cb1a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/MysqlLoadManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/MysqlLoadManager.java @@ -300,6 +300,12 @@ public class MysqlLoadManager { String timezone = props.get(LoadStmt.TIMEZONE); httpPut.addHeader(LoadStmt.TIMEZONE, timezone); } + + // trim quotes + if (props.containsKey(LoadStmt.KEY_TRIM_DOUBLE_QUOTES)) { + String trimQuotes = props.get(LoadStmt.KEY_TRIM_DOUBLE_QUOTES); + httpPut.addHeader(LoadStmt.KEY_TRIM_DOUBLE_QUOTES, trimQuotes); + } } // skip_lines diff --git a/regression-test/data/load_p0/mysql_load/mysql_load_trim_quotes.csv b/regression-test/data/load_p0/mysql_load/mysql_load_trim_quotes.csv new file mode 100644 index 0000000000..a1aa22cc84 --- /dev/null +++ b/regression-test/data/load_p0/mysql_load/mysql_load_trim_quotes.csv @@ -0,0 +1,4 @@ +1,2,3,"abc",2022-12-01,2022-12-01:09:30:31 +2,3,3,abc,2022-12-01,2022-12-01:09:30:31 +3,4,3,"abc",2022-12-01,2022-12-01:09:30:31 +4,5,3,abc,2022-12-01,2022-12-01:09:30:31 diff --git a/regression-test/data/load_p0/mysql_load/test_mysql_load_trim_quotes.out b/regression-test/data/load_p0/mysql_load/test_mysql_load_trim_quotes.out new file mode 100644 index 0000000000..ad49847d99 --- /dev/null +++ b/regression-test/data/load_p0/mysql_load/test_mysql_load_trim_quotes.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 2 3 abc 2022-12-01 2022-12-01T09:30:31 +2 3 3 abc 2022-12-01 2022-12-01T09:30:31 +3 4 3 abc 2022-12-01 2022-12-01T09:30:31 +4 5 3 abc 2022-12-01 2022-12-01T09:30:31 + +-- !sql -- +1 2 3 "abc" 2022-12-01 2022-12-01T09:30:31 +2 3 3 abc 2022-12-01 2022-12-01T09:30:31 +3 4 3 "abc" 2022-12-01 2022-12-01T09:30:31 +4 5 3 abc 2022-12-01 2022-12-01T09:30:31 + diff --git a/regression-test/suites/load_p0/mysql_load/test_mysql_load_trim_quotes.groovy b/regression-test/suites/load_p0/mysql_load/test_mysql_load_trim_quotes.groovy new file mode 100644 index 0000000000..2004088bb1 --- /dev/null +++ b/regression-test/suites/load_p0/mysql_load/test_mysql_load_trim_quotes.groovy @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_mysql_load_trim_quotes", "p0") { + sql "show tables" + + def tableName = "test_mysql_load_trim_quotes" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k1` int(20) NULL, + `k2` bigint(20) NULL, + `v1` tinyint(4) NULL, + `v2` string NULL, + `v3` date NULL, + `v4` datetime NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`, `k2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3 + PROPERTIES ("replication_allocation" = "tag.location.default: 1"); + """ + + def mysql_load_skip_lines = getLoalFilePath "mysql_load_trim_quotes.csv" + + // trim quotes + sql """ + LOAD DATA + LOCAL + INFILE '${mysql_load_skip_lines}' + INTO TABLE ${tableName} + COLUMNS TERMINATED BY ',' + PROPERTIES ("trim_double_quotes"="true"); + """ + + sql "sync" + qt_sql "select * from ${tableName} order by k1, k2" + + // double quotes + sql """truncate table ${tableName}""" + sql """ + LOAD DATA + LOCAL + INFILE '${mysql_load_skip_lines}' + INTO TABLE ${tableName} + COLUMNS TERMINATED BY ',' + """ + + sql "sync" + qt_sql "select * from ${tableName} order by k1, k2" +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org