This is an automated email from the ASF dual-hosted git repository. aloyszhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push: new 9ec6785c63 [INLONG-10900][SDK] Transform SQL support soundex function (#11061) 9ec6785c63 is described below commit 9ec6785c631ef7d6160aa7ee8f54e603ae279361 Author: Huan Liang <lf9283...@gmail.com> AuthorDate: Tue Sep 10 10:33:17 2024 +0800 [INLONG-10900][SDK] Transform SQL support soundex function (#11061) --- .../process/function/SoundexFunction.java | 79 ++++++++++++++++++++++ .../function/string/TestSoundexFunction.java | 49 ++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/SoundexFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/SoundexFunction.java new file mode 100644 index 0000000000..2a46c2a8a6 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/SoundexFunction.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Function; + +import java.nio.charset.StandardCharsets; + +/** + * SoundexFunction + * description: soundex(string)---Returns a four character code representing the sound of a string. This method returns a string, or null if parameter is null. + * See https://en.wikipedia.org/wiki/Soundex for more information. + */ +@TransformFunction(names = {"soundex"}) +public class SoundexFunction implements ValueParser { + + private ValueParser stringParser; + + private static final byte[] SOUNDEX_INDEX = + "71237128722455712623718272\000\000\000\000\000\00071237128722455712623718272" + .getBytes(StandardCharsets.ISO_8859_1); + + public SoundexFunction(Function expr) { + stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0)); + } + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object stringObject = stringParser.parse(sourceData, rowIndex, context); + String str = OperatorTools.parseString(stringObject); + if (str == null) { + return null; + } + return new String(getSoundex(str), StandardCharsets.ISO_8859_1); + } + + private static byte[] getSoundex(String str) { + byte[] chars = {'0', '0', '0', '0'}; + byte lastDigit = '0'; + for (int i = 0, j = 0, l = str.length(); i < l && j < 4; i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'z') { + byte newDigit = SOUNDEX_INDEX[c - 'A']; + if (newDigit != 0) { + if (j == 0) { + chars[j++] = (byte) (c & 0xdf); // Converts a-z to A-Z + lastDigit = newDigit; + } else if (newDigit <= '6') { + if (newDigit != lastDigit) { + chars[j++] = lastDigit = newDigit; + } + } else if (newDigit == '7') { + lastDigit = newDigit; + } + } + } + } + return chars; + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestSoundexFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestSoundexFunction.java new file mode 100644 index 0000000000..8b673ab06f --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestSoundexFunction.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestSoundexFunction extends AbstractFunctionStringTestBase { + + @Test + public void testSoundexFunction() throws Exception { + String transformSql = "select soundex(string1) from source"; + TransformConfig config = new TransformConfig(transformSql); + TransformProcessor<String, String> processor1 = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // soundex('soundex function') + List<String> output1 = processor1.transform("soundex function", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=S532"); + // soundex('hello world') + List<String> output2 = processor1.transform("hello world", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=H464"); + } +}