This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new e92dc19f7f6 [fix](hive) add support for quoteChar and seperatorChar
for hive (branch-2.0) (#28703)#28703
e92dc19f7f6 is described below
commit e92dc19f7f6823b9d6d0cb622aa8de6309c0d515
Author: wuwenchi <[email protected]>
AuthorDate: Thu Dec 21 19:03:13 2023 +0800
[fix](hive) add support for quoteChar and seperatorChar for hive
(branch-2.0) (#28703)#28703
bp #28613
---
.../hive/scripts/create_preinstalled_table.hql | 8 +++++
.../doris/planner/external/HiveScanNode.java | 17 +++++++++-
.../hive/test_hive_serde_prop.out | 4 +++
.../hive/test_hive_serde_prop.groovy | 36 ++++++++++++++++++++++
4 files changed, 64 insertions(+), 1 deletion(-)
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 4e80d7466d2..e798ecd7f2b 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1798,3 +1798,11 @@ create table stats_test2 (id INT, value STRING) STORED
AS PARQUET;
insert into stats_test1 values (1, 'name1'), (2, 'name2'), (3, 'name3');
INSERT INTO stats_test2 VALUES (1, ';'), (2, '\*');
+
+create table employee_gz(name string,salary string)
+row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+with serdeproperties
+('quoteChar'='\"'
+,'seperatorChar'=',');
+
+insert into employee_gz values ('a', '1.1'), ('b', '2.2');
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
index 943d30017e7..58b93112477 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
@@ -76,6 +76,9 @@ public class HiveScanNode extends FileQueryScanNode {
public static final String DEFAULT_FIELD_DELIMITER = "\1"; // "\x01"
public static final String PROP_LINE_DELIMITER = "line.delim";
public static final String DEFAULT_LINE_DELIMITER = "\n";
+ public static final String PROP_SEPERATOR_CHAR = "seperatorChar";
+ public static final String PROP_QUOTA_CHAR = "quoteChar";
+
public static final String PROP_COLLECTION_DELIMITER_HIVE2 =
"colelction.delim";
public static final String PROP_COLLECTION_DELIMITER_HIVE3 =
"collection.delim";
@@ -362,7 +365,16 @@ public class HiveScanNode extends FileQueryScanNode {
protected TFileAttributes getFileAttributes() throws UserException {
TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
java.util.Map<String, String> delimiter =
hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
-
textParams.setColumnSeparator(delimiter.getOrDefault(PROP_FIELD_DELIMITER,
DEFAULT_FIELD_DELIMITER));
+ if (delimiter.containsKey(PROP_FIELD_DELIMITER)) {
+ textParams.setColumnSeparator(delimiter.get(PROP_FIELD_DELIMITER));
+ } else if (delimiter.containsKey(PROP_SEPERATOR_CHAR)) {
+ textParams.setColumnSeparator(delimiter.get(PROP_SEPERATOR_CHAR));
+ } else {
+ textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
+ }
+ if (delimiter.containsKey(PROP_QUOTA_CHAR)) {
+
textParams.setEnclose(delimiter.get(PROP_QUOTA_CHAR).getBytes()[0]);
+ }
textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER,
DEFAULT_LINE_DELIMITER));
textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER,
DEFAULT_MAP_KV_DELIMITER));
@@ -377,6 +389,9 @@ public class HiveScanNode extends FileQueryScanNode {
TFileAttributes fileAttributes = new TFileAttributes();
fileAttributes.setTextParams(textParams);
fileAttributes.setHeaderType("");
+ if (textParams.isSet(TFileTextScanRangeParams._Fields.ENCLOSE)) {
+ fileAttributes.setTrimDoubleQuotes(true);
+ }
return fileAttributes;
}
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
new file mode 100644
index 00000000000..1cde2baec27
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -0,0 +1,4 @@
+test_hive_serde_prop.out -- This file is automatically generated. You should
know what you did if you want to edit this
+-- !1 --
+a 1.1
+b 2.2
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
new file mode 100644
index 00000000000..41d7056d208
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_serde_prop",
"external_docker,hive,external_docker_hive,p0,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String catalog_name = "test_hive_serde_prop"
+ String ex_db_name = "`stats_test`"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String hms_port = context.config.otherConfigs.get("hms_port")
+
+ sql """drop catalog if exists ${catalog_name} """
+
+ sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='hms',
+ 'hive.metastore.uris' =
'thrift://${externalEnvIp}:${hms_port}',
+ 'hadoop.username' = 'hive'
+ );"""
+
+ qt_1 """select * from ${catalog_name}.${ex_db_name}.employee_gz
order by name;"""
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]