[ https://issues.apache.org/jira/browse/FLINK-8301?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16302803#comment-16302803 ]
ASF GitHub Bot commented on FLINK-8301: --------------------------------------- Github user sunjincheng121 commented on a diff in the pull request: https://github.com/apache/flink/pull/5203#discussion_r158598514 --- Diff: flink-libraries/flink-table/src/test/scala/org/apache/flink/table/runtime/batch/sql/CalcITCase.scala --- @@ -352,6 +353,72 @@ class CalcITCase( val results = result.toDataSet[Row].collect() TestBaseUtils.compareResultAsText(results.asJava, expected) } + + @Test + def testDeterministicUdfWithUnicodeParameter(): Unit = { + val data = new mutable.MutableList[(String, String, String)] + data.+=((null, null, null)) + + val env = ExecutionEnvironment.getExecutionEnvironment + + val tEnv = TableEnvironment.getTableEnvironment(env) + + val udf0 = new LiteralUDF("\"\\", deterministic = true) + val udf1 = new LiteralUDF("\u0001xyz", deterministic = true) + val udf2 = new LiteralUDF("\u0001\u0012", deterministic = true) + + tEnv.registerFunction("udf0", udf0) + tEnv.registerFunction("udf1", udf1) + tEnv.registerFunction("udf2", udf2) + + // user have to specify '\' with '\\' in SQL + val sqlQuery = "SELECT " + + "udf0('\"\\\\') as str1, " + + "udf1('\u0001xyz') as str2, " + + "udf2('\u0001\u0012') as str3 from T1" + + val t1 = env.fromCollection(data).toTable(tEnv, 'str1, 'str2, 'str3) + + tEnv.registerTable("T1", t1) + + val results = tEnv.sql(sqlQuery).toDataSet[Row].collect() + + val expected = List("\"\\,\u0001xyz,\u0001\u0012").mkString("\n") + TestBaseUtils.compareResultAsText(results.asJava, expected) + } + + @Test + def testNonDeterministicUdfWithUnicodeParameter(): Unit = { --- End diff -- For reduce IT test time cost, I suggest that merge "testDeterministicUdfWithUnicodeParameter" and "testNonDeterministicUdfWithUnicodeParameter" in one test case. i.e. we create two instance with deterministic value. something as follows: ` val udf00 = new LiteralUDF("\"\\", deterministic = false) val udf01 = new LiteralUDF("\"\\", deterministic = true) ... ` > Support Unicode in codegen for SQL && TableAPI > ---------------------------------------------- > > Key: FLINK-8301 > URL: https://issues.apache.org/jira/browse/FLINK-8301 > Project: Flink > Issue Type: Improvement > Components: Table API & SQL > Reporter: Ruidong Li > Assignee: Ruidong Li > > The current code generation do not support Unicode, "\u0001" will be > generated to "\\u0001", function call like concat(str, "\u0001") will lead to > wrong result. > This issue intend to handle char/varchar literal correctly, some examples > followed as below. > literal: '\u0001abc' -> codegen: "\u0001abc" > literal: '\u0022\' -> codegen: "\"\\" -- This message was sent by Atlassian JIRA (v6.4.14#64029)