This is an automated email from the ASF dual-hosted git repository.

luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new 751f69dad8 [INLONG-11037][SDK] Transform support ENCODE() and DECODE() 
function (#11041)
751f69dad8 is described below

commit 751f69dad806dff68ce796399c5e7f0c7dbfdb95
Author: emptyOVO <118812562+empty...@users.noreply.github.com>
AuthorDate: Wed Sep 11 14:13:37 2024 +0800

    [INLONG-11037][SDK] Transform support ENCODE() and DECODE() function 
(#11041)
    
    * [INLONG-11037][SDK] Transform support ENCODE() and DECODE() function
    
    * fix: add NP check
    
    * fix: clear definition of the specific encoding type
    
    * fix: add description
---
 .../transform/process/function/DecodeFunction.java | 95 +++++++++++++++++++++
 .../transform/process/function/EncodeFunction.java | 97 ++++++++++++++++++++++
 .../function/string/TestDecodeFunction.java        | 80 ++++++++++++++++++
 .../function/string/TestEncodeFunction.java        | 76 +++++++++++++++++
 4 files changed, 348 insertions(+)

diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java
new file mode 100644
index 0000000000..042e6b4f98
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+/**
+ * DecodeFunction
+ * description: decode(binary, string)
+ *      Decode using the supplied character set (' US-ASCII ', 'ISO-8859-1', 
'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+ *      If either parameter is empty, the result will also be empty.
+ */
+@TransformFunction(names = {"decode"})
+public class DecodeFunction implements ValueParser {
+
+    private ValueParser binaryParser;
+
+    private ValueParser characterSetParser;
+
+    private static final Set<String> SUPPORTED_CHARSETS;
+
+    static {
+        Set<String> charsets = new HashSet<>();
+        charsets.add(StandardCharsets.US_ASCII.name());
+        charsets.add(StandardCharsets.ISO_8859_1.name());
+        charsets.add(StandardCharsets.UTF_8.name());
+        charsets.add(StandardCharsets.UTF_16.name());
+        charsets.add(StandardCharsets.UTF_16BE.name());
+        charsets.add(StandardCharsets.UTF_16LE.name());
+        SUPPORTED_CHARSETS = Collections.unmodifiableSet(charsets);
+    }
+
+    public DecodeFunction(Function expr) {
+        List<Expression> expressions = expr.getParameters().getExpressions();
+        if (expressions != null && expressions.size() == 2) {
+            binaryParser = OperatorTools.buildParser(expressions.get(0));
+            characterSetParser = OperatorTools.buildParser(expressions.get(1));
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        Object binaryObj = binaryParser.parse(sourceData, rowIndex, context);
+        Object characterObj = characterSetParser.parse(sourceData, rowIndex, 
context);
+        if (binaryObj == null || characterObj == null) {
+            return null;
+        }
+        String binaryString = OperatorTools.parseString(binaryObj);
+        String characterSetValue = 
OperatorTools.parseString(characterObj).toUpperCase();
+        return decode(binaryString, characterSetValue);
+    }
+
+    private String decode(String binaryString, String charsetName) {
+        if (binaryString == null || binaryString.isEmpty() || charsetName == 
null || charsetName.isEmpty()) {
+            return "";
+        }
+        String[] byteValues = binaryString.split(" ");
+        byte[] byteArray = new byte[byteValues.length];
+        for (int i = 0; i < byteValues.length; i++) {
+            byteArray[i] = (byte) Integer.parseInt(byteValues[i]);
+        }
+        if (Charset.isSupported(charsetName) && 
SUPPORTED_CHARSETS.contains(charsetName)) {
+            Charset charset = Charset.forName(charsetName);
+            return new String(byteArray, charset);
+        }
+        return "";
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java
new file mode 100644
index 0000000000..8196c529fc
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+/**
+ * EncodeFunction
+ * description: encode(string1, string2)
+ *      Encode using the provided character set (' US-ASCII ', 'ISO-8859-1', 
'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+ *      If either parameter is empty, the result will also be empty.
+ */
+@TransformFunction(names = {"encode"})
+public class EncodeFunction implements ValueParser {
+
+    private ValueParser stringParser;
+
+    private ValueParser characterSetParser;
+
+    private static final Set<String> SUPPORTED_CHARSETS;
+
+    static {
+        Set<String> charsets = new HashSet<>();
+        charsets.add(StandardCharsets.US_ASCII.name());
+        charsets.add(StandardCharsets.ISO_8859_1.name());
+        charsets.add(StandardCharsets.UTF_8.name());
+        charsets.add(StandardCharsets.UTF_16.name());
+        charsets.add(StandardCharsets.UTF_16BE.name());
+        charsets.add(StandardCharsets.UTF_16LE.name());
+        SUPPORTED_CHARSETS = Collections.unmodifiableSet(charsets);
+    }
+
+    public EncodeFunction(Function expr) {
+        List<Expression> expressions = expr.getParameters().getExpressions();
+        if (expressions != null && expressions.size() == 2) {
+            stringParser = OperatorTools.buildParser(expressions.get(0));
+            characterSetParser = OperatorTools.buildParser(expressions.get(1));
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        Object stringObj = stringParser.parse(sourceData, rowIndex, context);
+        Object characterObj = characterSetParser.parse(sourceData, rowIndex, 
context);
+        if (stringObj == null || characterObj == null) {
+            return null;
+        }
+        String stringValue = OperatorTools.parseString(stringObj);
+        String characterSetValue = 
OperatorTools.parseString(characterObj).toUpperCase();
+        byte[] encodeBytes = encode(stringValue, characterSetValue);
+        StringBuilder res = new StringBuilder();
+        if (encodeBytes != null) {
+            for (byte encodeByte : encodeBytes) {
+                res.append((int) encodeByte).append(" ");
+            }
+        }
+        return res.toString().trim();
+    }
+
+    private byte[] encode(String stringValue, String characterSetValue) {
+        if (stringValue == null || stringValue.isEmpty() || characterSetValue 
== null || characterSetValue.isEmpty()) {
+            return new byte[0];
+        }
+        if (Charset.isSupported(characterSetValue) && 
SUPPORTED_CHARSETS.contains(characterSetValue)) {
+            Charset charset = Charset.forName(characterSetValue);
+            return stringValue.getBytes(charset);
+        }
+        return null;
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java
new file mode 100644
index 0000000000..4368334b79
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestDecodeFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testDecodeFunction() throws Exception {
+        String transformSql = "select decode(string1,string2) from source";
+        TransformConfig config = new TransformConfig(transformSql);
+        TransformProcessor<String, String> processor = TransformProcessor
+                .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+
+        // case1: decode('72 101 108 108 111','UTF-8')
+        List<String> output1 = processor.transform("72 101 108 108 
111|UTF-8|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=Hello");
+
+        // case2: decode('72 101 108 108 111','US-ASCII')
+        List<String> output2 = processor.transform("72 101 108 108 
111|US-ASCII|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=Hello");
+
+        // case3: decode('72 101 108 108 111','ISO-8859-1')
+        List<String> output3 = processor.transform("72 101 108 108 
111|ISO-8859-1|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=Hello");
+
+        // case4: decode('0 72 0 101 0 108 0 108 0 111','UTF-16BE')
+        List<String> output4 =
+                processor.transform("0 72 0 101 0 108 0 108 0 
111|UTF-16BE|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output4.size());
+        Assert.assertEquals(output4.get(0), "result=Hello");
+
+        // case5: decode('72 0 101 0 108 0 108 0 111 0','UTF-16LE')
+        List<String> output5 =
+                processor.transform("72 0 101 0 108 0 108 0 111 
0|UTf-16LE|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=Hello");
+
+        // case6: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16')
+        List<String> output6 =
+                processor.transform("-2 -1 0 72 0 101 0 108 0 108 0 
111|UtF-16|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output6.size());
+        Assert.assertEquals(output6.get(0), "result=Hello");
+
+        // case7: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16--')
+        List<String> output7 =
+                processor.transform("-2 -1 0 72 0 101 0 108 0 108 0 
111|UTF-16--|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output7.size());
+        Assert.assertEquals(output7.get(0), "result=");
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java
new file mode 100644
index 0000000000..73ff2f4876
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestEncodeFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testEncodeFunction() throws Exception {
+        String transformSql = "select encode(string1,string2) from source";
+        TransformConfig config = new TransformConfig(transformSql);
+        TransformProcessor<String, String> processor = TransformProcessor
+                .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+
+        // case1: encode('Hello','UTF-8')
+        List<String> output1 = 
processor.transform("Hello|UTF-8|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=72 101 108 108 111");
+
+        // case2: encode('Hello','US-ASCII')
+        List<String> output2 = 
processor.transform("Hello|US-ASCII|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=72 101 108 108 111");
+
+        // case3: encode('Hello','ISO-8859-1')
+        List<String> output3 = 
processor.transform("Hello|ISO-8859-1|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=72 101 108 108 111");
+
+        // case4: encode('Hello','UTF-16BE')
+        List<String> output4 = 
processor.transform("Hello|UTF-16BE|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output4.size());
+        Assert.assertEquals(output4.get(0), "result=0 72 0 101 0 108 0 108 0 
111");
+
+        // case5: encode('Hello','UTF-16LE')
+        List<String> output5 = 
processor.transform("Hello|UTf-16LE|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=72 0 101 0 108 0 108 0 111 
0");
+
+        // case6: encode('Hello','UTF-16')
+        List<String> output6 = 
processor.transform("Hello|UtF-16|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output6.size());
+        Assert.assertEquals(output6.get(0), "result=-2 -1 0 72 0 101 0 108 0 
108 0 111");
+
+        // case7: encode('Hello','UTF-16--')
+        List<String> output7 = 
processor.transform("Hello|UTF-16--|banana|cloud|1", new HashMap<>());
+        Assert.assertEquals(1, output7.size());
+        Assert.assertEquals(output7.get(0), "result=");
+    }
+}

Reply via email to