This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 78a958467f [improvement](Load) Make broker load support the properties of trim_double_quotes and skip_lines (#16622) 78a958467f is described below commit 78a958467fbb4cfd491071bca22df09cf581b595 Author: huangzhaowei <carlmartin...@gmail.com> AuthorDate: Sun Feb 12 16:52:59 2023 +0800 [improvement](Load) Make broker load support the properties of trim_double_quotes and skip_lines (#16622) `trim_double_quotes` and `skip_lines` were supported in stream load. So make it support broker load too. --- .../Data-Manipulation-Statements/Load/BROKER-LOAD.md | 18 ++++++++++++++++++ .../Data-Manipulation-Statements/Load/STREAM-LOAD.md | 2 ++ .../Data-Manipulation-Statements/Load/BROKER-LOAD.md | 18 ++++++++++++++++++ .../Data-Manipulation-Statements/Load/STREAM-LOAD.md | 1 + .../org/apache/doris/analysis/DataDescription.java | 7 +++++++ .../main/java/org/apache/doris/analysis/LoadStmt.java | 13 +++++++++++++ 6 files changed, 59 insertions(+) diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md index 4aa1a1aff5..6665b87c83 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md @@ -477,6 +477,24 @@ WITH BROKER broker_name ) ``` +12. Load CSV date and trim double quotes and skip first 5 lines + + ```SQL + LOAD LABEL example_db.label12 + ( + DATA INFILE("cosn://my_bucket/input/file.csv") + INTO TABLE `my_table` + (k1, k2, k3) + PROPERTIES("trim_double_quotes" = "true", "skip_lines" = "5") + ) + WITH BROKER "broker_name" + ( + "fs.cosn.userinfo.secretId" = "xxx", + "fs.cosn.userinfo.secretKey" = "xxxx", + "fs.cosn.bucket.endpoint_suffix" = "cos.xxxxxxxxx.myqcloud.com" + ) + ``` + ### Keywords BROKER, LOAD diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md index 897b6f5116..48c3d0284d 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md @@ -182,6 +182,8 @@ ERRORS: 25. trim_double_quotes: Boolean type, The default value is false. True means that the outermost double quotes of each field in the csv file are trimmed. +26. skip_lines: <version since="dev" type="inline"> Integer type, the default value is 0. It will skip some lines in the head of csv file. It will be disabled when format is `csv_with_names` or `csv_with_names_and_types`. </version> + ### Example 1. Import the data in the local file 'testData' into the table 'testTbl' in the database 'testDb', and use Label for deduplication. Specify a timeout of 100 seconds diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md index 83838b00a3..03f73e5578 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md @@ -475,6 +475,24 @@ WITH BROKER broker_name ) ``` +12. 导入CSV数据时去掉双引号, 并跳过前5行。 + + ```SQL + LOAD LABEL example_db.label12 + ( + DATA INFILE("cosn://my_bucket/input/file.csv") + INTO TABLE `my_table` + (k1, k2, k3) + PROPERTIES("trim_double_quotes" = "true", "skip_lines" = "5") + ) + WITH BROKER "broker_name" + ( + "fs.cosn.userinfo.secretId" = "xxx", + "fs.cosn.userinfo.secretKey" = "xxxx", + "fs.cosn.bucket.endpoint_suffix" = "cos.xxxxxxxxx.myqcloud.com" + ) + ``` + ### Keywords BROKER, LOAD diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md index 444509b5c9..94ff15167c 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md @@ -179,6 +179,7 @@ ERRORS: 25. trim_double_quotes: 布尔类型,默认值为 false,为 true 时表示裁剪掉 csv 文件每个字段最外层的双引号。 +26. skip_lines: <version since="dev" type="inline"> 整数类型, 默认值为0, 含义为跳过csv文件的前几行. 当设置format设置为 `csv_with_names` 或、`csv_with_names_and_types` 时, 该参数会失效. </version> ### Example 1. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表,使用Label用于去重。指定超时时间为 100 秒 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java index c78c47aa80..603ead9cbd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java @@ -938,6 +938,13 @@ public class DataDescription { if (analysisMap.containsKey(LoadStmt.KEY_IN_PARAM_NUM_AS_STRING)) { numAsString = Boolean.parseBoolean(analysisMap.get(LoadStmt.KEY_IN_PARAM_NUM_AS_STRING)); } + + if (analysisMap.containsKey(LoadStmt.KEY_TRIM_DOUBLE_QUOTES)) { + trimDoubleQuotes = Boolean.parseBoolean(analysisMap.get(LoadStmt.KEY_TRIM_DOUBLE_QUOTES)); + } + if (analysisMap.containsKey(LoadStmt.KEY_SKIP_LINES)) { + skipLines = Integer.parseInt(analysisMap.get(LoadStmt.KEY_SKIP_LINES)); + } } private void checkLoadPriv(String fullDbName) throws AnalysisException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java index 8a1a45f7b2..a96934e7dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java @@ -118,6 +118,7 @@ public class LoadStmt extends DdlStmt { public static final String KEY_IN_PARAM_SEQUENCE_COL = "sequence_col"; public static final String KEY_IN_PARAM_BACKEND_ID = "backend_id"; public static final String KEY_SKIP_LINES = "skip_lines"; + public static final String KEY_TRIM_DOUBLE_QUOTES = "trim_double_quotes"; private final LabelName label; private final List<DataDescription> dataDescriptions; @@ -192,6 +193,18 @@ public class LoadStmt extends DdlStmt { return Boolean.valueOf(s); } }) + .put(KEY_SKIP_LINES, new Function<String, Integer>() { + @Override + public @Nullable Integer apply(@Nullable String s) { + return Integer.valueOf(s); + } + }) + .put(KEY_TRIM_DOUBLE_QUOTES, new Function<String, Boolean>() { + @Override + public @Nullable Boolean apply(@Nullable String s) { + return Boolean.valueOf(s); + } + }) .build(); public LoadStmt(DataDescription dataDescription, Map<String, String> properties) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org