This is an automated email from the ASF dual-hosted git repository. dockerzhang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push: new 9021b3c475 [INLONG-11217][SDK] Transform support JSON_QUOTE() and JSON_UNQUOTE() function (#11244) 9021b3c475 is described below commit 9021b3c4758ba90ea3a37b6e354cf05b918b87df Author: emptyOVO <118812562+empty...@users.noreply.github.com> AuthorDate: Tue Oct 8 12:58:26 2024 +0800 [INLONG-11217][SDK] Transform support JSON_QUOTE() and JSON_UNQUOTE() function (#11244) --- .../process/function/JsonQuoteFunction.java | 62 ++++++++ .../process/function/JsonUnQuoteFunction.java | 66 ++++++++ .../function/string/TestJsonQuoteFunction.java | 176 +++++++++++++++++++++ .../function/string/TestJsonUnQuoteFunction.java | 98 ++++++++++++ 4 files changed, 402 insertions(+) diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/JsonQuoteFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/JsonQuoteFunction.java new file mode 100644 index 0000000000..c0f022f9f2 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/JsonQuoteFunction.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import com.alibaba.fastjson.JSON; +import net.sf.jsqlparser.expression.Function; +/** + * JsonQuoteFunction + * description: JSON_QUOTE(string)--Quotes a string as a JSON value by wrapping it with double quote characters, + * escaping interior quote and special characters (’"’, ‘', ‘/’, ‘b’, ‘f’, ’n’, ‘r’, ’t’), and returning + * the result as a string. If the argument is NULL, the function returns NULL. + * + * JSON_STRING(string)--Serializes a value into JSON. returns a JSON string containing the serialized value. + * If the value is NULL, the function returns NULL. + * + * for example: json_quote('Hello, World!')--return "Hello, World!" + * json_quote('Complex string with / and \\')--return "Complex string with / and \\" + * + * json_string(1)--return 1 + * json_string(true)--return "true" + */ +@TransformFunction(names = {"json_quote", "json_string"}) +public class JsonQuoteFunction implements ValueParser { + + private ValueParser jsonParser; + + public JsonQuoteFunction(Function expr) { + this.jsonParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (jsonParser == null) { + return null; + } + Object parse = jsonParser.parse(sourceData, rowIndex, context); + if (parse == null) { + return null; + } + return JSON.toJSONString(parse); + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/JsonUnQuoteFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/JsonUnQuoteFunction.java new file mode 100644 index 0000000000..2c7d60069e --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/JsonUnQuoteFunction.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONException; +import net.sf.jsqlparser.expression.Function; +/** + * JsonUnQuoteFunction + * description: JSON_UNQUOTE(string)--Unquotes JSON value, unescapes escaped special characters (’"’, ‘', ‘/’, ‘b’, + * ‘f’, ’n’, ‘r’, ’t’, ‘u’ hex hex hex hex), and returns the result as a string. If the argument is NULL, + * returns NULL. If the value does not start and end with double quotes or if it starts and ends with double + * quotes but is not a valid JSON string literal, the value is passed through unmodified. + * for example: json_unquote('Hello, World!')--return "Hello, World!" + * json_unquote('Complex string with / and \\')--return "Complex string with / and \\" + */ +@TransformFunction(names = {"json_unquote"}) +public class JsonUnQuoteFunction implements ValueParser { + + private ValueParser jsonParser; + + public JsonUnQuoteFunction(Function expr) { + this.jsonParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (jsonParser == null) { + return null; + } + String jsonString = OperatorTools.parseString(jsonParser.parse(sourceData, rowIndex, context)); + if (jsonString == null) { + return null; + } + if (jsonString.length() < 2 || jsonString.charAt(0) != '"' + || jsonString.charAt(jsonString.length() - 1) != '"') { + return jsonString; + } + try { + return JSON.parseObject(jsonString, String.class); + } catch (JSONException e) { + return jsonString; + } + } + +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestJsonQuoteFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestJsonQuoteFunction.java new file mode 100644 index 0000000000..ee5f2c948a --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestJsonQuoteFunction.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestJsonQuoteFunction extends AbstractFunctionStringTestBase { + + @Test + public void testJsonQuoteFunction() throws Exception { + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor<String, String> processor = null; + List<String> output = null; + + transformSql = "select json_quote(string1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: json_quote('Hello, World!') + data = "Hello, World!|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Hello, World!\"", output.get(0)); + + // case2: json_quote('This is a "quoted" string') + data = "This is a \"quoted\" string|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"This is a quoted string\"", output.get(0)); + + // case3: json_quote('A back\slash:') + data = "A back\\slash:|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"A backslash:\"", output.get(0)); + + // case4: json_quote('Column1\tColumn2) + data = "Column1\tColumn2|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Column1\\tColumn2\"", output.get(0)); + + // case5: json_quote('Quotes ' and double quotes \"') + data = "Quotes ' and double quotes \\\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Quotes ' and double quotes \\\"\"", output.get(0)); + + // case6: json_quote(null) + data = "|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"\"", output.get(0)); + + // case7: json_quote('Complex string with / and \\') + data = "Complex string with / and \\\\"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Complex string with / and \\\\\"", output.get(0)); + + // case8: json_quote('Unicode test: ሴ噸') + data = "Unicode test: ሴ噸|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Unicode test: ሴ噸\"", output.get(0)); + + transformSql = "select json_quote(xxd) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case9: json_quote() + data = "|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=", output.get(0)); + } + + @Test + public void testJsonStringFunction() throws Exception { + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor<String, String> processor = null; + List<String> output = null; + + transformSql = "select json_string(string1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: json_string('true') + data = "true|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"true\"", output.get(0)); + + // case2: json_string('This is a "quoted" string') + data = "This is a \"quoted\" string|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"This is a quoted string\"", output.get(0)); + + // case3: json_string('A back\slash:') + data = "A back\\slash:|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"A backslash:\"", output.get(0)); + + // case4: json_string('Column1\tColumn2) + data = "Column1\tColumn2|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Column1\\tColumn2\"", output.get(0)); + + // case5: json_string('Quotes ' and double quotes \"') + data = "Quotes ' and double quotes \\\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Quotes ' and double quotes \\\"\"", output.get(0)); + + // case6: json_string(null) + data = "|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"\"", output.get(0)); + + // case7: json_string('Complex string with / and \\') + data = "Complex string with / and \\\\"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Complex string with / and \\\\\"", output.get(0)); + + // case8: json_string('Unicode test: ሴ噸') + data = "Unicode test: ሴ噸|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=\"Unicode test: ሴ噸\"", output.get(0)); + + transformSql = "select json_quote(xxd) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case9: json_string() + data = "|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=", output.get(0)); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestJsonUnQuoteFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestJsonUnQuoteFunction.java new file mode 100644 index 0000000000..98c1774715 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestJsonUnQuoteFunction.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestJsonUnQuoteFunction extends AbstractFunctionStringTestBase { + + @Test + public void testJsonUnQuoteFunction() throws Exception { + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor<String, String> processor = null; + List<String> output = null; + + transformSql = "select json_unquote(string1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: json_unquote('Hello, World!') + data = "\"Hello, World!\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=Hello, World!", output.get(0)); + + // case2: json_unquote('3.5') + data = "\"This is a 'quoted' string\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=This is a 'quoted' string", output.get(0)); + + // case3: is_digit('35') + data = "\"A back\\slash:\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=A back\\slash:", output.get(0)); + + // case4: is_digit('') + data = "\"Column1\tColumn2\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=Column1\tColumn2", output.get(0)); + + // case4: is_digit('') + data = "\"Quotes ' and double quotes \\\"\\\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=Quotes ' and double quotes \\\"", output.get(0)); + + // case4: is_digit('') + data = "\"Complex string with / and \\\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=Complex string with / and \\", output.get(0)); + + // case4: is_digit('') + data = "\"Unicode test: ሴ噸\"|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=Unicode test: ሴ噸", output.get(0)); + + transformSql = "select json_unquote(xxd) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case5: json_unquote() + data = "|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=", output.get(0)); + } +}