This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c07e893d87e [improve](udf)add some check for udf when result is null 
(#51084)
c07e893d87e is described below

commit c07e893d87ec6689ff2ed8f3d2ede0e8ce3e25a6
Author: zhangstar333 <[email protected]>
AuthorDate: Thu May 22 17:52:42 2025 +0800

    [improve](udf)add some check for udf when result is null (#51084)
    
    ### What problem does this PR solve?
    Problem Summary:
    
    sometimes the user create udf function with  "always_nullable"="false"
    but the function will return null in the logical of udf,
    so add some check let user could get error msg more clearly.
---
 .../apache/doris/common/jni/vec/VectorColumn.java  | 47 ++++++++++++++++++++++
 .../main/java/org/apache/doris/udf/StringTest.java |  3 ++
 .../suites/javaudf_p0/test_javaudf_array.groovy    | 12 +++++-
 .../suites/javaudf_p0/test_javaudf_int.groovy      | 26 ++++++++++++
 .../suites/javaudf_p0/test_javaudf_string.groovy   | 13 ++++++
 5 files changed, 100 insertions(+), 1 deletion(-)

diff --git 
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
 
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
index d2f2e42f866..46d99191adb 100644
--- 
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
+++ 
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
@@ -350,6 +350,16 @@ public class VectorColumn {
         }
     }
 
+    public void checkNullable(Object[] batch, int rows) {
+        for (int i = 0; i < rows; ++i) {
+            if (batch[i] == null) {
+                throw new RuntimeException(
+                        "the result of " + i + " row is null, but the return 
type is not nullable, please check "
+                                + "the always_nullable property in create 
function statement, it's should be true");
+            }
+        }
+    }
+
     public final boolean isNullAt(int rowId) {
         if (numNulls == 0 || nullMap == 0) {
             return false;
@@ -454,6 +464,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = (byte) (batch[i] ? 1 : 0);
             }
@@ -511,6 +522,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = batch[i];
             }
@@ -568,6 +580,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = batch[i];
             }
@@ -625,6 +638,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = batch[i];
             }
@@ -682,6 +696,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = batch[i];
             }
@@ -739,6 +754,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = batch[i];
             }
@@ -796,6 +812,7 @@ public class VectorColumn {
             }
             OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET, 
null, nullMap + appendIndex, rows);
         } else {
+            checkNullable(batch, rows);
             for (int i = 0; i < rows; ++i) {
                 batchData[i] = batch[i];
             }
@@ -837,6 +854,9 @@ public class VectorColumn {
     }
 
     public void appendBigInteger(BigInteger[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (BigInteger v : batch) {
             if (v == null) {
@@ -904,6 +924,9 @@ public class VectorColumn {
     }
 
     public void appendInetAddress(InetAddress[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (InetAddress v : batch) {
             if (v == null) {
@@ -933,6 +956,9 @@ public class VectorColumn {
     }
 
     public void appendDecimal(BigDecimal[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (BigDecimal v : batch) {
             if (v == null) {
@@ -979,6 +1005,9 @@ public class VectorColumn {
     }
 
     public void appendDate(LocalDate[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (LocalDate v : batch) {
             if (v == null) {
@@ -1045,6 +1074,9 @@ public class VectorColumn {
     }
 
     public void appendDateTime(LocalDateTime[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (LocalDateTime v : batch) {
             if (v == null) {
@@ -1146,6 +1178,9 @@ public class VectorColumn {
     }
 
     public void appendStringAndOffset(String[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (String v : batch) {
             byte[] bytes;
@@ -1162,6 +1197,9 @@ public class VectorColumn {
     }
 
     public void appendBinaryAndOffset(byte[][] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         for (byte[] v : batch) {
             byte[] bytes = v;
@@ -1215,6 +1253,9 @@ public class VectorColumn {
     }
 
     public void appendArray(List<Object>[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         int offset = childColumns[0].appendIndex;
         for (List<Object> v : batch) {
@@ -1275,6 +1316,9 @@ public class VectorColumn {
     }
 
     public void appendMap(Map<Object, Object>[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         int offset = childColumns[0].appendIndex;
         for (Map<Object, Object> v : batch) {
@@ -1341,6 +1385,9 @@ public class VectorColumn {
     }
 
     public void appendStruct(Map<String, Object>[] batch, boolean isNullable) {
+        if (!isNullable) {
+            checkNullable(batch, batch.length);
+        }
         reserve(appendIndex + batch.length);
         Object[][] columnData = new Object[childColumns.length][];
         
Preconditions.checkArgument(this.getColumnType().getChildNames().size() == 
childColumns.length);
diff --git 
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
 
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
index cc1a6a2bca7..822c484c706 100644
--- 
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
+++ 
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
@@ -22,6 +22,9 @@ import org.apache.hadoop.hive.ql.exec.UDF;
 
 public class StringTest extends UDF {
     public String evaluate(String field, Integer a, Integer b) {
+        if (field == null || a == null || b == null) {
+            return null;
+        }
         return field.substring(0, a) + StringUtils.repeat("*", field.length() 
- a -b) + field.substring(field.length()-b);
     }
 }
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy 
b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
index 0d782c036b4..4cae4e872f9 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
@@ -124,9 +124,19 @@ suite("test_javaudf_array") {
             "type"="JAVA_UDF"
         ); """
         qt_select_14 """ SELECT java_udf_array_list_test(array(string_col)), 
string_col, tinyint_col as result FROM ${tableName} ORDER BY result; """
-
+        sql """ CREATE FUNCTION 
java_udf_array_list_test_not_nullable(array<string>) RETURNS array<string> 
PROPERTIES (
+            "file"="file://${jarPath}",
+            "symbol"="org.apache.doris.udf.ArrayListTest",
+            "always_nullable"="false",
+            "type"="JAVA_UDF"
+        ); """
+        test {
+            sql """ SELECT java_udf_array_list_test_not_nullable(NULL); """
+            exception "but the return type is not nullable"
+        }
     } finally {
         try_sql("DROP FUNCTION IF EXISTS java_udf_array_int_test(array<int>);")
+        try_sql("DROP FUNCTION IF EXISTS 
java_udf_array_list_test_not_nullable(array<string>);")
         try_sql("DROP FUNCTION IF EXISTS 
java_udf_array_return_int_test(array<int>);")
         try_sql("DROP FUNCTION IF EXISTS 
java_udf_array_return_string_test(array<string>);")
         try_sql("DROP FUNCTION IF EXISTS 
java_udf_array_string_test(array<string>);")
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_int.groovy 
b/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
index cb9b87b7bf0..7433866a9f2 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
@@ -123,6 +123,30 @@ suite("test_javaudf_int") {
         qt_select_global_3 """ SELECT java_udf_int_test_global(3) result FROM 
${tableName} ORDER BY result; """
         qt_select_global_4 """ SELECT abs(java_udf_int_test_global(3)) result 
FROM ${tableName} ORDER BY result; """
 
+        sql """ CREATE FUNCTION java_udf_int_test_not_nullable(int) RETURNS 
int PROPERTIES (
+            "file"="file://${jarPath}",
+            "symbol"="org.apache.doris.udf.IntTest",
+            "always_nullable"="false",
+            "type"="JAVA_UDF"
+        ); """
+
+        test {
+            sql """ SELECT java_udf_int_test_not_nullable(NULL); """
+            exception "but the return type is not nullable"
+        }
+
+        sql """ CREATE FUNCTION java_udf_largeint_test_not_nullable(largeint) 
RETURNS largeint PROPERTIES (
+            "file"="file://${jarPath}",
+            "symbol"="org.apache.doris.udf.LargeintTest",
+            "always_nullable"="false",
+            "type"="JAVA_UDF"
+        ); """
+
+        test {
+            sql """ SELECT java_udf_largeint_test_not_nullable(NULL); """
+            exception "but the return type is not nullable"
+        }
+
     } finally {
         try_sql("DROP GLOBAL FUNCTION IF EXISTS 
java_udf_int_test_global(int);")
         try_sql("DROP FUNCTION IF EXISTS java_udf_tinyint_test(tinyint);")
@@ -130,6 +154,8 @@ suite("test_javaudf_int") {
         try_sql("DROP FUNCTION IF EXISTS java_udf_bigint_test(bigint);")
         try_sql("DROP FUNCTION IF EXISTS java_udf_largeint_test(largeint);")
         try_sql("DROP FUNCTION IF EXISTS java_udf_int_test(int);")
+        try_sql("DROP FUNCTION IF EXISTS java_udf_int_test_not_nullable(int);")
+        try_sql("DROP FUNCTION IF EXISTS 
java_udf_largeint_test_not_nullable(largeint);")
         try_sql("DROP TABLE IF EXISTS ${tableName}")
     }
 }
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_string.groovy 
b/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
index 48e98b0c5b6..2158c50e432 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
@@ -114,8 +114,21 @@ suite("test_javaudf_string") {
         }
         sql """  insert into tbl1 select random()%10000 * 10000, "5" from 
tbl1;"""
         qt_select_5 """ select count(0) from (select k1, max(k2) as k2 from 
tbl1 group by k1)v where java_udf_string_test(k2, 0, 1) = "asd" """;
+
+        sql """ CREATE FUNCTION java_udf_string_test_not_nullabel(string, int, 
int) RETURNS string PROPERTIES (
+            "file"="file://${jarPath}",
+            "symbol"="org.apache.doris.udf.StringTest",
+            "always_nullable"="false",
+            "type"="JAVA_UDF"
+        ); """
+
+        test {
+            sql """ SELECT java_udf_string_test_not_nullabel(NULL,NULL,NULL); 
"""
+            exception "but the return type is not nullable"
+        }
     } finally {
         try_sql("DROP FUNCTION IF EXISTS java_udf_string_test(string, int, 
int);")
+        try_sql("DROP FUNCTION IF EXISTS 
java_udf_string_test_not_nullabel(string, int, int);")
         try_sql("DROP TABLE IF EXISTS ${tableName}")
         try_sql("DROP TABLE IF EXISTS tbl1")
         try_sql("DROP TABLE IF EXISTS test_javaudf_string_2")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to