This is an automated email from the ASF dual-hosted git repository.

diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a4464b  [improvement] Escaping special characters (#118)
9a4464b is described below

commit 9a4464bf63ca1af5ca82d95e0ab6d2e136de49f6
Author: gnehil <adamlee...@gmail.com>
AuthorDate: Tue Jul 18 14:33:37 2023 +0800

    [improvement] Escaping special characters (#118)
---
 .../apache/doris/spark/load/DorisStreamLoad.java   |  6 ++--
 .../org/apache/doris/spark/util/EscapeHandler.java | 40 ++++++++++++++++++++++
 .../apache/doris/spark/util/EscapeHandlerTest.java | 36 +++++++++++++++++++
 3 files changed, 80 insertions(+), 2 deletions(-)

diff --git 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/load/DorisStreamLoad.java
 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/load/DorisStreamLoad.java
index 61379e3..07e6624 100644
--- 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/load/DorisStreamLoad.java
+++ 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/load/DorisStreamLoad.java
@@ -14,6 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
 package org.apache.doris.spark.load;
 
 import org.apache.doris.spark.cfg.ConfigurationOptions;
@@ -22,6 +23,7 @@ import org.apache.doris.spark.exception.StreamLoadException;
 import org.apache.doris.spark.rest.RestService;
 import org.apache.doris.spark.rest.models.BackendV2;
 import org.apache.doris.spark.rest.models.RespContent;
+import org.apache.doris.spark.util.EscapeHandler;
 import org.apache.doris.spark.util.ListUtils;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
@@ -102,9 +104,9 @@ public class DorisStreamLoad implements Serializable {
                 .build(new BackendCacheLoader(settings));
         fileType = streamLoadProp.getOrDefault("format", "csv");
         if ("csv".equals(fileType)){
-            FIELD_DELIMITER = streamLoadProp.getOrDefault("column_separator", 
"\t");
-            LINE_DELIMITER = streamLoadProp.getOrDefault("line_delimiter", 
"\n");
+            FIELD_DELIMITER = 
EscapeHandler.escapeString(streamLoadProp.getOrDefault("column_separator", 
"\t"));
         }
+        LINE_DELIMITER = 
EscapeHandler.escapeString(streamLoadProp.getOrDefault("line_delimiter", "\n"));
     }
 
     public String getLoadUrlStr() {
diff --git 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/util/EscapeHandler.java
 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/util/EscapeHandler.java
new file mode 100644
index 0000000..87a3989
--- /dev/null
+++ 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/util/EscapeHandler.java
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.spark.util;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class EscapeHandler {
+    public static final String ESCAPE_DELIMITERS_FLAGS = "\\x";
+    public static final Pattern ESCAPE_PATTERN = 
Pattern.compile("\\\\x([0-9|a-f|A-F]{2})");
+
+    public static String escapeString(String source) {
+        if (source.contains(ESCAPE_DELIMITERS_FLAGS)) {
+            Matcher m = ESCAPE_PATTERN.matcher(source);
+            StringBuffer buf = new StringBuffer();
+            while (m.find()) {
+                m.appendReplacement(buf, String.format("%s", (char) 
Integer.parseInt(m.group(1), 16)));
+            }
+            m.appendTail(buf);
+            return buf.toString();
+        }
+        return source;
+    }
+
+}
\ No newline at end of file
diff --git 
a/spark-doris-connector/src/test/java/org/apache/doris/spark/util/EscapeHandlerTest.java
 
b/spark-doris-connector/src/test/java/org/apache/doris/spark/util/EscapeHandlerTest.java
new file mode 100644
index 0000000..d8fb270
--- /dev/null
+++ 
b/spark-doris-connector/src/test/java/org/apache/doris/spark/util/EscapeHandlerTest.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.spark.util;
+
+import junit.framework.TestCase;
+import org.junit.Assert;
+
+import java.util.Properties;
+
+public class EscapeHandlerTest extends TestCase {
+
+    public void testEscapeString() {
+
+
+        String s1 = "\\x09\\x09";
+        String s2 = "\\x0A\\x0A";
+        Assert.assertEquals("\t\t", EscapeHandler.escapeString(s1));
+        Assert.assertEquals("\n\n", EscapeHandler.escapeString(s2));
+
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to