This is an automated email from the ASF dual-hosted git repository. aloyszhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push: new 04971c37e9 [INLONG-10832][SDK] Transform SQL support Translate function (#10863) 04971c37e9 is described below commit 04971c37e908771526b2ac7c888cc55badfeff27 Author: Huan Liang <coderliangh...@gmail.com> AuthorDate: Wed Aug 28 16:03:07 2024 +0800 [INLONG-10832][SDK] Transform SQL support Translate function (#10863) Co-authored-by: AloysZhang <aloyszh...@apache.org> --- .../process/function/TranslateFunction.java | 109 +++++++++++++++++++++ .../transform/process/operator/OperatorTools.java | 4 +- .../TestTransformStringFunctionsProcessor.java | 27 +++++ 3 files changed, 139 insertions(+), 1 deletion(-) diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java new file mode 100644 index 0000000000..ce05eb0fc3 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * TranslateFunction + * translate(expression, find_chars, replace_chars) + * Description: + * For a given expression, replaces all occurrences of specified characters with specified substitutes. + * Existing characters are mapped to replacement characters by their positions in the find_chars and replace_chars arguments. + * If more characters are specified in the find_chars argument than in the replace_chars argument, the extra characters from the find_chars argument are omitted in the return value. + * + * Translate function is similar to the replace function and the regexp_replace function, + * except that replace substitutes one entire string with another string and regexp_replace lets you search a string for a regular expression pattern, + * while translate makes multiple single-character substitutions. + * + * Arguments: + * expression: The expression to be translated. + * find_chars: A string containing the characters to be replaced. + * replace_chars: A string containing the characters to substitute. + * examples: + * case1: translate(email, '@', '.') -> original_expression: ha...@inlong.com target_expression: harry.inlong.com + * case2: translate(hello WorD, 'WD', 'wd') -> original_expression: hello WorD target_expression: hello word + */ +public class TranslateFunction implements ValueParser { + + private ValueParser originalStrParser; + + private ValueParser findCharsParser; + + private ValueParser replaceCharsParser; + + public TranslateFunction(Function expr) { + List<Expression> expressions = expr.getParameters().getExpressions(); + originalStrParser = OperatorTools.buildParser(expressions.get(0)); + findCharsParser = OperatorTools.buildParser(expressions.get(1)); + replaceCharsParser = OperatorTools.buildParser(expressions.get(2)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object originalStrObject = originalStrParser.parse(sourceData, rowIndex, context); + Object findCharsObject = findCharsParser.parse(sourceData, rowIndex, context); + Object replaceCharsObject = replaceCharsParser.parse(sourceData, rowIndex, context); + String originalStr = OperatorTools.parseString(originalStrObject); + String findChars = OperatorTools.parseString(findCharsObject); + String replaceChars = OperatorTools.parseString(replaceCharsObject); + + if (originalStr == null) { + return ""; + } + StringBuilder builder = null; + final int findSize = findChars == null ? 0 : findChars.length(); + final int replaceSize = replaceChars == null ? 0 : replaceChars.length(); + final int commonSize = Math.min(findSize, replaceSize); + // Create a map to store character replacements + Map<Character, Character> replacementMap = new HashMap<>(); + for (int i = 0; i < commonSize; i++) { + char findChar = findChars.charAt(i); + char replaceChar = replaceChars.charAt(i); + replacementMap.put(findChar, replaceChar); + } + for (int i = 0, size = originalStr.length(); i < size; i++) { + char ch = originalStr.charAt(i); + if (replacementMap.containsKey(ch)) { + // Find the index of the current character in findChars, + // and replace the character at that index with the character at the same index in replaceChars. + if (builder == null) { + builder = new StringBuilder(size); + if (i > 0) { + builder.append(originalStr, 0, i); + } + } + ch = replacementMap.get(ch); + } + if (builder != null) { + builder.append(ch); + } + } + return builder == null ? originalStr : builder.toString(); + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java index fbd52185e0..e4bfb2cf62 100644 --- a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java @@ -59,6 +59,7 @@ import org.apache.inlong.sdk.transform.process.function.TimestampExtractFunction import org.apache.inlong.sdk.transform.process.function.ToBase64Function; import org.apache.inlong.sdk.transform.process.function.ToDateFunction; import org.apache.inlong.sdk.transform.process.function.ToTimestampFunction; +import org.apache.inlong.sdk.transform.process.function.TranslateFunction; import org.apache.inlong.sdk.transform.process.function.TrimFunction; import org.apache.inlong.sdk.transform.process.function.UnixTimestampFunction; import org.apache.inlong.sdk.transform.process.function.UpperFunction; @@ -111,7 +112,7 @@ import java.util.Map; /** * OperatorTools - * + * */ public class OperatorTools { @@ -179,6 +180,7 @@ public class OperatorTools { functionMap.put("right", RightFunction::new); functionMap.put("timestampadd", TimestampAddFunction::new); functionMap.put("md5", Md5Function::new); + functionMap.put("translate", TranslateFunction::new); } public static ExpressionOperator buildOperator(Expression expr) { diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java index f28e9ac50e..2a47615958 100644 --- a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java @@ -442,4 +442,31 @@ public class TestTransformStringFunctionsProcessor { Assert.assertEquals(1, output1.size()); Assert.assertEquals("result=null", output1.get(0)); } + + @Test + public void testTranslateFunction() throws Exception { + String transformSql1 = "select translate(string1, string2, string3) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor<String, String> processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: translate("hello word!", "el", "EL") + List<String> output1 = processor1.transform("hello word!|el|EL|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=hELLo word!"); + String transformSql2 = "select translate(string3, string1, string2) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor<String, String> processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case2: translate("hello word!", "el", "EL") + List<String> output2 = processor2.transform("el|EL|hello word!|1|1|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=hELLo word!"); + // case3: translate('ApaCHe Inlong', CH, ch) + List<String> output3 = processor2.transform("CH|ch|ApaCHe Inlong|2|1|9", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=Apache Inlong"); + } + }