This is an automated email from the ASF dual-hosted git repository.

aloyszhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new 04971c37e9 [INLONG-10832][SDK] Transform SQL support Translate 
function (#10863)
04971c37e9 is described below

commit 04971c37e908771526b2ac7c888cc55badfeff27
Author: Huan Liang <coderliangh...@gmail.com>
AuthorDate: Wed Aug 28 16:03:07 2024 +0800

    [INLONG-10832][SDK] Transform SQL support Translate function (#10863)
    
    Co-authored-by: AloysZhang <aloyszh...@apache.org>
---
 .../process/function/TranslateFunction.java        | 109 +++++++++++++++++++++
 .../transform/process/operator/OperatorTools.java  |   4 +-
 .../TestTransformStringFunctionsProcessor.java     |  27 +++++
 3 files changed, 139 insertions(+), 1 deletion(-)

diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java
new file mode 100644
index 0000000000..ce05eb0fc3
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * TranslateFunction
+ * translate(expression, find_chars, replace_chars)
+ * Description:
+ * For a given expression, replaces all occurrences of specified characters 
with specified substitutes.
+ * Existing characters are mapped to replacement characters by their positions 
in the find_chars and replace_chars arguments.
+ * If more characters are specified in the find_chars argument than in the 
replace_chars argument, the extra characters from the find_chars argument are 
omitted in the return value.
+ *
+ * Translate function is similar to the replace function and the 
regexp_replace function,
+ * except that replace substitutes one entire string with another string and 
regexp_replace lets you search a string for a regular expression pattern,
+ * while translate makes multiple single-character substitutions.
+ *
+ * Arguments:
+ *      expression: The expression to be translated.
+ *      find_chars: A string containing the characters to be replaced.
+ *      replace_chars: A string containing the characters to substitute.
+ * examples:
+ *      case1: translate(email, '@', '.') -> original_expression: 
ha...@inlong.com  target_expression: harry.inlong.com
+ *      case2: translate(hello WorD, 'WD', 'wd') -> original_expression: hello 
WorD  target_expression: hello word
+ */
+public class TranslateFunction implements ValueParser {
+
+    private ValueParser originalStrParser;
+
+    private ValueParser findCharsParser;
+
+    private ValueParser replaceCharsParser;
+
+    public TranslateFunction(Function expr) {
+        List<Expression> expressions = expr.getParameters().getExpressions();
+        originalStrParser = OperatorTools.buildParser(expressions.get(0));
+        findCharsParser = OperatorTools.buildParser(expressions.get(1));
+        replaceCharsParser = OperatorTools.buildParser(expressions.get(2));
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        Object originalStrObject = originalStrParser.parse(sourceData, 
rowIndex, context);
+        Object findCharsObject = findCharsParser.parse(sourceData, rowIndex, 
context);
+        Object replaceCharsObject = replaceCharsParser.parse(sourceData, 
rowIndex, context);
+        String originalStr = OperatorTools.parseString(originalStrObject);
+        String findChars = OperatorTools.parseString(findCharsObject);
+        String replaceChars = OperatorTools.parseString(replaceCharsObject);
+
+        if (originalStr == null) {
+            return "";
+        }
+        StringBuilder builder = null;
+        final int findSize = findChars == null ? 0 : findChars.length();
+        final int replaceSize = replaceChars == null ? 0 : 
replaceChars.length();
+        final int commonSize = Math.min(findSize, replaceSize);
+        // Create a map to store character replacements
+        Map<Character, Character> replacementMap = new HashMap<>();
+        for (int i = 0; i < commonSize; i++) {
+            char findChar = findChars.charAt(i);
+            char replaceChar = replaceChars.charAt(i);
+            replacementMap.put(findChar, replaceChar);
+        }
+        for (int i = 0, size = originalStr.length(); i < size; i++) {
+            char ch = originalStr.charAt(i);
+            if (replacementMap.containsKey(ch)) {
+                // Find the index of the current character in findChars,
+                // and replace the character at that index with the character 
at the same index in replaceChars.
+                if (builder == null) {
+                    builder = new StringBuilder(size);
+                    if (i > 0) {
+                        builder.append(originalStr, 0, i);
+                    }
+                }
+                ch = replacementMap.get(ch);
+            }
+            if (builder != null) {
+                builder.append(ch);
+            }
+        }
+        return builder == null ? originalStr : builder.toString();
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
index fbd52185e0..e4bfb2cf62 100644
--- 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
@@ -59,6 +59,7 @@ import 
org.apache.inlong.sdk.transform.process.function.TimestampExtractFunction
 import org.apache.inlong.sdk.transform.process.function.ToBase64Function;
 import org.apache.inlong.sdk.transform.process.function.ToDateFunction;
 import org.apache.inlong.sdk.transform.process.function.ToTimestampFunction;
+import org.apache.inlong.sdk.transform.process.function.TranslateFunction;
 import org.apache.inlong.sdk.transform.process.function.TrimFunction;
 import org.apache.inlong.sdk.transform.process.function.UnixTimestampFunction;
 import org.apache.inlong.sdk.transform.process.function.UpperFunction;
@@ -111,7 +112,7 @@ import java.util.Map;
 
 /**
  * OperatorTools
- * 
+ *
  */
 public class OperatorTools {
 
@@ -179,6 +180,7 @@ public class OperatorTools {
         functionMap.put("right", RightFunction::new);
         functionMap.put("timestampadd", TimestampAddFunction::new);
         functionMap.put("md5", Md5Function::new);
+        functionMap.put("translate", TranslateFunction::new);
     }
 
     public static ExpressionOperator buildOperator(Expression expr) {
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
index f28e9ac50e..2a47615958 100644
--- 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
@@ -442,4 +442,31 @@ public class TestTransformStringFunctionsProcessor {
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=null", output1.get(0));
     }
+
+    @Test
+    public void testTranslateFunction() throws Exception {
+        String transformSql1 = "select translate(string1, string2, string3) 
from source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: translate("hello word!", "el", "EL")
+        List<String> output1 = processor1.transform("hello word!|el|EL|2|1|3", 
new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=hELLo word!");
+        String transformSql2 = "select translate(string3, string1, string2) 
from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case2: translate("hello word!", "el", "EL")
+        List<String> output2 = processor2.transform("el|EL|hello word!|1|1|3", 
new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=hELLo word!");
+        // case3: translate('ApaCHe Inlong', CH, ch)
+        List<String> output3 = processor2.transform("CH|ch|ApaCHe 
Inlong|2|1|9", new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=Apache Inlong");
+    }
+
 }

Reply via email to