This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5291f14aff [vectorized](udf) java udf support array type (#16841) 5291f14aff is described below commit 5291f14aff410c46af6838677003873d6121d8ba Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Mon Feb 20 10:00:25 2023 +0800 [vectorized](udf) java udf support array type (#16841) --- be/src/vec/functions/function_java_udf.cpp | 101 +++- be/src/vec/functions/function_java_udf.h | 12 + .../ecosystem/udf/java-user-defined-function.md | 2 + .../ecosystem/udf/java-user-defined-function.md | 2 + .../main/java/org/apache/doris/catalog/Type.java | 1 + .../apache/doris/analysis/CreateFunctionStmt.java | 17 +- .../java/org/apache/doris/udf/BaseExecutor.java | 658 ++++++++++++++++++++- .../java/org/apache/doris/udf/UdfExecutor.java | 9 +- .../main/java/org/apache/doris/udf/UdfUtils.java | 35 +- gensrc/thrift/Types.thrift | 12 + .../data/javaudf_p0/test_javaudf_array.out | 133 +++++ .../java/org/apache/doris/udf/ArrayDateTest.java | 29 + .../org/apache/doris/udf/ArrayDateTimeTest.java | 29 + .../java/org/apache/doris/udf/ArrayIntTest.java | 38 ++ .../apache/doris/udf/ArrayReturnArrayIntTest.java | 40 ++ .../doris/udf/ArrayReturnArrayStringTest.java | 41 ++ .../java/org/apache/doris/udf/ArrayStringTest.java | 39 ++ .../suites/javaudf_p0/test_javaudf_array.groovy | 122 ++++ run-regression-test.sh | 5 +- 19 files changed, 1293 insertions(+), 32 deletions(-) diff --git a/be/src/vec/functions/function_java_udf.cpp b/be/src/vec/functions/function_java_udf.cpp index d5e3751ef5..abffb8d8bc 100644 --- a/be/src/vec/functions/function_java_udf.cpp +++ b/be/src/vec/functions/function_java_udf.cpp @@ -27,6 +27,8 @@ #include "runtime/exec_env.h" #include "runtime/user_function_cache.h" #include "util/jni-util.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" #include "vec/core/block.h" #include "vec/data_types/data_type_bitmap.h" @@ -78,9 +80,16 @@ Status JavaFunctionCall::prepare(FunctionContext* context, ctor_params.__set_input_offsets_ptrs((int64_t)jni_ctx->input_offsets_ptrs.get()); ctor_params.__set_input_buffer_ptrs((int64_t)jni_ctx->input_values_buffer_ptr.get()); ctor_params.__set_input_nulls_ptrs((int64_t)jni_ctx->input_nulls_buffer_ptr.get()); + ctor_params.__set_input_array_nulls_buffer_ptr( + (int64_t)jni_ctx->input_array_nulls_buffer_ptr.get()); + ctor_params.__set_input_array_string_offsets_ptrs( + (int64_t)jni_ctx->input_array_string_offsets_ptrs.get()); ctor_params.__set_output_buffer_ptr((int64_t)jni_ctx->output_value_buffer.get()); ctor_params.__set_output_null_ptr((int64_t)jni_ctx->output_null_value.get()); ctor_params.__set_output_offsets_ptr((int64_t)jni_ctx->output_offsets_ptr.get()); + ctor_params.__set_output_array_null_ptr((int64_t)jni_ctx->output_array_null_ptr.get()); + ctor_params.__set_output_array_string_offsets_ptr( + (int64_t)jni_ctx->output_array_string_offsets_ptr.get()); ctor_params.__set_output_intermediate_state_ptr( (int64_t)jni_ctx->output_intermediate_state_ptr.get()); ctor_params.__set_batch_size_ptr((int64_t)jni_ctx->batch_size_ptr.get()); @@ -142,6 +151,31 @@ Status JavaFunctionCall::execute(FunctionContext* context, Block& block, } else if (data_cols[arg_idx]->is_numeric() || data_cols[arg_idx]->is_column_decimal()) { jni_ctx->input_values_buffer_ptr.get()[arg_idx] = reinterpret_cast<int64_t>(data_cols[arg_idx]->get_raw_data().data); + } else if (data_cols[arg_idx]->is_column_array()) { + const ColumnArray* array_col = + assert_cast<const ColumnArray*>(data_cols[arg_idx].get()); + jni_ctx->input_offsets_ptrs.get()[arg_idx] = + reinterpret_cast<int64_t>(array_col->get_offsets_column().get_raw_data().data); + const ColumnNullable& array_nested_nullable = + assert_cast<const ColumnNullable&>(array_col->get_data()); + auto data_column_null_map = array_nested_nullable.get_null_map_column_ptr(); + auto data_column = array_nested_nullable.get_nested_column_ptr(); + jni_ctx->input_array_nulls_buffer_ptr.get()[arg_idx] = reinterpret_cast<int64_t>( + check_and_get_column<ColumnVector<UInt8>>(data_column_null_map) + ->get_data() + .data()); + + //need pass FE, nullamp and offset, chars + if (data_column->is_column_string()) { + const ColumnString* col = assert_cast<const ColumnString*>(data_column.get()); + jni_ctx->input_values_buffer_ptr.get()[arg_idx] = + reinterpret_cast<int64_t>(col->get_chars().data()); + jni_ctx->input_array_string_offsets_ptrs.get()[arg_idx] = + reinterpret_cast<int64_t>(col->get_offsets().data()); + } else { + jni_ctx->input_values_buffer_ptr.get()[arg_idx] = + reinterpret_cast<int64_t>(data_column->get_raw_data().data); + } } else { return Status::InvalidArgument( strings::Substitute("Java UDF doesn't support type $0 now !", @@ -155,7 +189,6 @@ Status JavaFunctionCall::execute(FunctionContext* context, Block& block, auto null_type = std::reinterpret_pointer_cast<const DataTypeNullable>(return_type); auto data_col = null_type->get_nested_type()->create_column(); auto null_col = ColumnUInt8::create(data_col->size(), 0); - null_col->reserve(num_rows); null_col->resize(num_rows); *(jni_ctx->output_null_value) = reinterpret_cast<int64_t>(null_col->get_data().data()); @@ -168,9 +201,7 @@ Status JavaFunctionCall::execute(FunctionContext* context, Block& block, const_cast<ColumnString::Offsets&>(str_col->get_offsets()); \ int increase_buffer_size = 0; \ int32_t buffer_size = JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \ - chars.reserve(buffer_size); \ chars.resize(buffer_size); \ - offsets.reserve(num_rows); \ offsets.resize(num_rows); \ *(jni_ctx->output_value_buffer) = reinterpret_cast<int64_t>(chars.data()); \ *(jni_ctx->output_offsets_ptr) = reinterpret_cast<int64_t>(offsets.data()); \ @@ -188,12 +219,74 @@ Status JavaFunctionCall::execute(FunctionContext* context, Block& block, nullptr); \ } \ } else if (data_col->is_numeric() || data_col->is_column_decimal()) { \ - data_col->reserve(num_rows); \ data_col->resize(num_rows); \ *(jni_ctx->output_value_buffer) = \ reinterpret_cast<int64_t>(data_col->get_raw_data().data); \ env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_, executor_evaluate_id_, \ nullptr); \ + } else if (data_col->is_column_array()) { \ + ColumnArray* array_col = assert_cast<ColumnArray*>(data_col.get()); \ + ColumnNullable& array_nested_nullable = \ + assert_cast<ColumnNullable&>(array_col->get_data()); \ + auto data_column_null_map = array_nested_nullable.get_null_map_column_ptr(); \ + auto data_column = array_nested_nullable.get_nested_column_ptr(); \ + auto& offset_column = array_col->get_offsets_column(); \ + int increase_buffer_size = 0; \ + int32_t buffer_size = JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \ + offset_column.resize(num_rows); \ + *(jni_ctx->output_offsets_ptr) = \ + reinterpret_cast<int64_t>(offset_column.get_raw_data().data); \ + data_column_null_map->resize(buffer_size); \ + auto& null_map_data = \ + assert_cast<ColumnVector<UInt8>*>(data_column_null_map.get())->get_data(); \ + *(jni_ctx->output_array_null_ptr) = reinterpret_cast<int64_t>(null_map_data.data()); \ + jni_ctx->output_intermediate_state_ptr->row_idx = 0; \ + jni_ctx->output_intermediate_state_ptr->buffer_size = buffer_size; \ + if (data_column->is_column_string()) { \ + ColumnString* str_col = assert_cast<ColumnString*>(data_column.get()); \ + ColumnString::Chars& chars = assert_cast<ColumnString::Chars&>(str_col->get_chars()); \ + ColumnString::Offsets& offsets = \ + assert_cast<ColumnString::Offsets&>(str_col->get_offsets()); \ + chars.resize(buffer_size); \ + offsets.resize(buffer_size); \ + *(jni_ctx->output_value_buffer) = reinterpret_cast<int64_t>(chars.data()); \ + *(jni_ctx->output_array_string_offsets_ptr) = \ + reinterpret_cast<int64_t>(offsets.data()); \ + env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_, executor_evaluate_id_, \ + nullptr); \ + while (jni_ctx->output_intermediate_state_ptr->row_idx < num_rows) { \ + increase_buffer_size++; \ + buffer_size = JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \ + null_map_data.resize(buffer_size); \ + chars.resize(buffer_size); \ + offsets.resize(buffer_size); \ + *(jni_ctx->output_array_null_ptr) = \ + reinterpret_cast<int64_t>(null_map_data.data()); \ + *(jni_ctx->output_value_buffer) = reinterpret_cast<int64_t>(chars.data()); \ + jni_ctx->output_intermediate_state_ptr->buffer_size = buffer_size; \ + env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_, \ + executor_evaluate_id_, nullptr); \ + } \ + } else { \ + data_column->resize(buffer_size); \ + *(jni_ctx->output_value_buffer) = \ + reinterpret_cast<int64_t>(data_column->get_raw_data().data); \ + env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_, executor_evaluate_id_, \ + nullptr); \ + while (jni_ctx->output_intermediate_state_ptr->row_idx < num_rows) { \ + increase_buffer_size++; \ + buffer_size = JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \ + null_map_data.resize(buffer_size); \ + data_column->resize(buffer_size); \ + *(jni_ctx->output_array_null_ptr) = \ + reinterpret_cast<int64_t>(null_map_data.data()); \ + *(jni_ctx->output_value_buffer) = \ + reinterpret_cast<int64_t>(data_column->get_raw_data().data); \ + jni_ctx->output_intermediate_state_ptr->buffer_size = buffer_size; \ + env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_, \ + executor_evaluate_id_, nullptr); \ + } \ + } \ } else { \ return Status::InvalidArgument(strings::Substitute( \ "Java UDF doesn't support return type $0 now !", return_type->get_name())); \ diff --git a/be/src/vec/functions/function_java_udf.h b/be/src/vec/functions/function_java_udf.h index 9d9a4f8062..7ffa456d82 100644 --- a/be/src/vec/functions/function_java_udf.h +++ b/be/src/vec/functions/function_java_udf.h @@ -89,9 +89,17 @@ private: std::unique_ptr<int64_t[]> input_values_buffer_ptr; std::unique_ptr<int64_t[]> input_nulls_buffer_ptr; std::unique_ptr<int64_t[]> input_offsets_ptrs; + //used for array type nested column null map, because array nested column must be nullable + std::unique_ptr<int64_t[]> input_array_nulls_buffer_ptr; + //used for array type of nested string column offset, not the array column offset + std::unique_ptr<int64_t[]> input_array_string_offsets_ptrs; std::unique_ptr<int64_t> output_value_buffer; std::unique_ptr<int64_t> output_null_value; std::unique_ptr<int64_t> output_offsets_ptr; + //used for array type nested column null map + std::unique_ptr<int64_t> output_array_null_ptr; + //used for array type of nested string column offset + std::unique_ptr<int64_t> output_array_string_offsets_ptr; std::unique_ptr<int32_t> batch_size_ptr; // intermediate_state includes two parts: reserved / used buffer size and rows std::unique_ptr<IntermediateState> output_intermediate_state_ptr; @@ -101,9 +109,13 @@ private: input_values_buffer_ptr(new int64_t[num_args]), input_nulls_buffer_ptr(new int64_t[num_args]), input_offsets_ptrs(new int64_t[num_args]), + input_array_nulls_buffer_ptr(new int64_t[num_args]), + input_array_string_offsets_ptrs(new int64_t[num_args]), output_value_buffer(new int64_t()), output_null_value(new int64_t()), output_offsets_ptr(new int64_t()), + output_array_null_ptr(new int64_t()), + output_array_string_offsets_ptr(new int64_t()), batch_size_ptr(new int32_t()), output_intermediate_state_ptr(new IntermediateState()) {} diff --git a/docs/en/docs/ecosystem/udf/java-user-defined-function.md b/docs/en/docs/ecosystem/udf/java-user-defined-function.md index c70a6c6a12..06c421f39c 100644 --- a/docs/en/docs/ecosystem/udf/java-user-defined-function.md +++ b/docs/en/docs/ecosystem/udf/java-user-defined-function.md @@ -58,7 +58,9 @@ Java UDF provides users with a Java interface written in UDF to facilitate the e |Datetime|LocalDateTime| |String|String| |Decimal|BigDecimal| +|```array<Type>```|```ArrayList<Type>```| +* Array types can nested basic types, Eg: In Doris: ```array<int>``` corresponds to JAVA UDF Argument Type: ```ArrayList<Integer>```, Others is also. ## Write UDF functions This section mainly introduces how to develop a Java UDF. Samples for the Java version are provided under `samples/doris-demo/java-udf-demo/` for your reference, Check it out [here](https://github.com/apache/incubator-doris/tree/master/samples/doris-demo/java-udf-demo) diff --git a/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md b/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md index 805e8ddddd..fa2fd6ec56 100644 --- a/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md +++ b/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md @@ -56,7 +56,9 @@ Java UDF 为用户提供UDF编写的Java接口,以方便用户使用Java语言 |Datetime|LocalDateTime| |String|String| |Decimal|BigDecimal| +|```array<Type>```|```ArrayList<Type>```| +* array类型可以嵌套基本类型,例如Doris: ```array<int>```对应JAVA UDF Argument Type: ```ArrayList<Integer>```, 其他依此类推 ## 编写 UDF 函数 本小节主要介绍如何开发一个 Java UDF。在 `samples/doris-demo/java-udf-demo/` 下提供了示例,可供参考,查看点击[这里](https://github.com/apache/doris/tree/master/samples/doris-demo/java-udf-demo) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 61e22fb32a..8829fe8687 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -238,6 +238,7 @@ public abstract class Type { .put(PrimitiveType.DECIMAL32, Sets.newHashSet(BigDecimal.class)) .put(PrimitiveType.DECIMAL64, Sets.newHashSet(BigDecimal.class)) .put(PrimitiveType.DECIMAL128, Sets.newHashSet(BigDecimal.class)) + .put(PrimitiveType.ARRAY, Sets.newHashSet(ArrayList.class)) .build(); public static ArrayList<ScalarType> getIntegerTypes() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java index 1a9e90fb91..179b2f32ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java @@ -19,6 +19,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.AggregateFunction; import org.apache.doris.catalog.AliasFunction; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.Function.NullableMode; @@ -541,17 +542,23 @@ public class CreateFunctionStmt extends DdlStmt { private void checkUdfType(Class clazz, Method method, Type expType, Class pType, String pname) throws AnalysisException { - if (!(expType instanceof ScalarType)) { + Set<Class> javaTypes; + if (expType instanceof ScalarType) { + ScalarType scalarType = (ScalarType) expType; + javaTypes = Type.PrimitiveTypeToJavaClassType.get(scalarType.getPrimitiveType()); + } else if (expType instanceof ArrayType) { + ArrayType arrayType = (ArrayType) expType; + javaTypes = Type.PrimitiveTypeToJavaClassType.get(arrayType.getPrimitiveType()); + } else { throw new AnalysisException( - String.format("Method '%s' in class '%s' does not support non-scalar type '%s'", + String.format("Method '%s' in class '%s' does not support type '%s'", method.getName(), clazz.getCanonicalName(), expType)); } - ScalarType scalarType = (ScalarType) expType; - Set<Class> javaTypes = Type.PrimitiveTypeToJavaClassType.get(scalarType.getPrimitiveType()); + if (javaTypes == null) { throw new AnalysisException( String.format("Method '%s' in class '%s' does not support type '%s'", - method.getName(), clazz.getCanonicalName(), scalarType)); + method.getName(), clazz.getCanonicalName(), expType.toString())); } if (!javaTypes.contains(pType)) { throw new AnalysisException( diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java b/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java index 9e481b521a..69a132315e 100644 --- a/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java +++ b/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java @@ -17,6 +17,7 @@ package org.apache.doris.udf; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Type; import org.apache.doris.thrift.TJavaUdfExecutorCtorParams; import org.apache.doris.udf.UdfUtils.JavaUdfDataType; @@ -32,6 +33,9 @@ import java.math.BigInteger; import java.math.RoundingMode; import java.net.URLClassLoader; import java.nio.charset.StandardCharsets; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; import java.util.Arrays; public abstract class BaseExecutor { @@ -48,33 +52,40 @@ public abstract class BaseExecutor { public static final String UDAF_RESULT_FUNCTION = "getValue"; // Object to deserialize ctor params from BE. - protected static final TBinaryProtocol.Factory PROTOCOL_FACTORY = - new TBinaryProtocol.Factory(); + protected static final TBinaryProtocol.Factory PROTOCOL_FACTORY = new TBinaryProtocol.Factory(); protected Object udf; // setup by init() and cleared by close() protected URLClassLoader classLoader; - // Return and argument types of the function inferred from the udf method signature. + // Return and argument types of the function inferred from the udf method + // signature. // The JavaUdfDataType enum maps it to corresponding primitive type. protected JavaUdfDataType[] argTypes; protected JavaUdfDataType retType; - // Input buffer from the backend. This is valid for the duration of an evaluate() call. + // Input buffer from the backend. This is valid for the duration of an + // evaluate() call. // These buffers are allocated in the BE. protected final long inputBufferPtrs; protected final long inputNullsPtrs; protected final long inputOffsetsPtrs; + protected final long inputArrayNullsPtrs; + protected final long inputArrayStringOffsetsPtrs; - // Output buffer to return non-string values. These buffers are allocated in the BE. + // Output buffer to return non-string values. These buffers are allocated in the + // BE. protected final long outputBufferPtr; protected final long outputNullPtr; protected final long outputOffsetsPtr; + protected final long outputArrayNullPtr; + protected final long outputArrayStringOffsetsPtr; protected final long outputIntermediateStatePtr; protected Class[] argClass; /** - * Create a UdfExecutor, using parameters from a serialized thrift object. Used by + * Create a UdfExecutor, using parameters from a serialized thrift object. Used + * by * the backend. */ public BaseExecutor(byte[] thriftParams) throws Exception { @@ -88,11 +99,14 @@ public abstract class BaseExecutor { inputBufferPtrs = request.input_buffer_ptrs; inputNullsPtrs = request.input_nulls_ptrs; inputOffsetsPtrs = request.input_offsets_ptrs; - + inputArrayNullsPtrs = request.input_array_nulls_buffer_ptr; + inputArrayStringOffsetsPtrs = request.input_array_string_offsets_ptrs; outputBufferPtr = request.output_buffer_ptr; outputNullPtr = request.output_null_ptr; outputOffsetsPtr = request.output_offsets_ptr; outputIntermediateStatePtr = request.output_intermediate_state_ptr; + outputArrayNullPtr = request.output_array_null_ptr; + outputArrayStringOffsetsPtr = request.output_array_string_offsets_ptr; Type[] parameterTypes = new Type[request.fn.arg_types.size()]; for (int i = 0; i < request.fn.arg_types.size(); ++i) { @@ -206,18 +220,24 @@ public abstract class BaseExecutor { case STRING: { long offset = Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null, UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputOffsetsPtrs, i)) + 4L * row)); - long numBytes = row == 0 ? offset : offset - Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null, - UdfUtils.UNSAFE.getLong(null, - UdfUtils.getAddressAtOffset(inputOffsetsPtrs, i)) + 4L * (row - 1))); - long base = - row == 0 ? UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputBufferPtrs, i)) : - UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputBufferPtrs, i)) - + offset - numBytes; + long numBytes = row == 0 ? offset + : offset - Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null, + UdfUtils.UNSAFE.getLong(null, + UdfUtils.getAddressAtOffset(inputOffsetsPtrs, i)) + 4L * (row - 1))); + long base = row == 0 + ? UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputBufferPtrs, i)) + : UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputBufferPtrs, i)) + + offset - numBytes; byte[] bytes = new byte[(int) numBytes]; UdfUtils.copyMemory(null, base, bytes, UdfUtils.BYTE_ARRAY_OFFSET, numBytes); inputObjects[i] = new String(bytes, StandardCharsets.UTF_8); break; } + case ARRAY_TYPE: { + Type type = argTypes[i].getItemType(); + inputObjects[i] = arrayTypeInputData(type, i, row); + break; + } default: throw new UdfRuntimeException("Unsupported argument type: " + argTypes[i]); } @@ -225,6 +245,233 @@ public abstract class BaseExecutor { return inputObjects; } + public ArrayList<?> arrayTypeInputData(Type type, int argIdx, long row) + throws UdfRuntimeException { + long offsetStart = (row == 0) ? 0 + : Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null, UdfUtils.UNSAFE.getLong(null, + UdfUtils.getAddressAtOffset(inputOffsetsPtrs, argIdx)) + 8L * (row - 1))); + long offsetEnd = Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null, UdfUtils.UNSAFE.getLong(null, + UdfUtils.getAddressAtOffset(inputOffsetsPtrs, argIdx)) + 8L * row)); + long arrayNullMapBase = UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputArrayNullsPtrs, argIdx)); + long arrayInputBufferBase = UdfUtils.UNSAFE.getLong(null, UdfUtils.getAddressAtOffset(inputBufferPtrs, argIdx)); + + switch (type.getPrimitiveType()) { + case BOOLEAN: { + ArrayList<Boolean> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + boolean value = UdfUtils.UNSAFE.getBoolean(null, arrayInputBufferBase + offsetRow); + data.add(value); + } + } + return data; + } + case TINYINT: { + ArrayList<Byte> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + byte value = UdfUtils.UNSAFE.getByte(null, arrayInputBufferBase + offsetRow); + data.add(value); + } + } + return data; + } + case SMALLINT: { + ArrayList<Short> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + short value = UdfUtils.UNSAFE.getShort(null, arrayInputBufferBase + 2L * offsetRow); + data.add(value); + } + } + return data; + } + case INT: { + ArrayList<Integer> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + int value = UdfUtils.UNSAFE.getInt(null, arrayInputBufferBase + 4L * offsetRow); + data.add(value); + } + } + return data; + } + case BIGINT: { + ArrayList<Long> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long value = UdfUtils.UNSAFE.getLong(null, arrayInputBufferBase + 8L * offsetRow); + data.add(value); + } + } + return data; + } + case FLOAT: { + ArrayList<Float> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + float value = UdfUtils.UNSAFE.getFloat(null, arrayInputBufferBase + 4L * offsetRow); + data.add(value); + } + } + return data; + } + case DOUBLE: { + ArrayList<Double> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + double value = UdfUtils.UNSAFE.getDouble(null, arrayInputBufferBase + 8L * offsetRow); + data.add(value); + } + } + return data; + } + case DATE: { + ArrayList<LocalDate> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long value = UdfUtils.UNSAFE.getLong(null, arrayInputBufferBase + 8L * offsetRow); + // TODO: now argClass[argIdx + argClassOffset] is java.util.ArrayList, can't get + // nested class type + // LocalDate obj = UdfUtils.convertDateToJavaDate(value, argClass[argIdx + + // argClassOffset]); + LocalDate obj = (LocalDate) UdfUtils.convertDateToJavaDate(value, LocalDate.class); + data.add(obj); + } + } + return data; + } + case DATETIME: { + ArrayList<LocalDateTime> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long value = UdfUtils.UNSAFE.getLong(null, arrayInputBufferBase + 8L * offsetRow); + // Object obj = UdfUtils.convertDateTimeToJavaDateTime(value, argClass[argIdx + + // argClassOffset]); + LocalDateTime obj = (LocalDateTime) UdfUtils.convertDateTimeToJavaDateTime(value, + LocalDateTime.class); + data.add(obj); + } + } + return data; + } + case DATEV2: { + ArrayList<LocalDate> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + int value = UdfUtils.UNSAFE.getInt(null, arrayInputBufferBase + 4L * offsetRow); + // Object obj = UdfUtils.convertDateV2ToJavaDate(value, argClass[argIdx + + // argClassOffset]); + LocalDate obj = (LocalDate) UdfUtils.convertDateV2ToJavaDate(value, LocalDate.class); + data.add(obj); + } + } + return data; + } + case DATETIMEV2: { + ArrayList<LocalDateTime> data = new ArrayList<>(); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long value = UdfUtils.UNSAFE.getLong(null, arrayInputBufferBase + 8L * offsetRow); + LocalDateTime obj = (LocalDateTime) UdfUtils.convertDateTimeV2ToJavaDateTime(value, + LocalDateTime.class); + data.add(obj); + } + } + return data; + } + case LARGEINT: { + ArrayList<BigInteger> data = new ArrayList<>(); + byte[] bytes = new byte[16]; + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long value = UdfUtils.UNSAFE.getLong(null, arrayInputBufferBase + 16L * offsetRow); + UdfUtils.copyMemory(null, value, bytes, UdfUtils.BYTE_ARRAY_OFFSET, 16); + data.add(new BigInteger(UdfUtils.convertByteOrder(bytes))); + } + } + return data; + } + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: { + int len; + if (type.getPrimitiveType() == PrimitiveType.DECIMAL32) { + len = 4; + } else if (type.getPrimitiveType() == PrimitiveType.DECIMAL64) { + len = 8; + } else { + len = 16; + } + ArrayList<BigDecimal> data = new ArrayList<>(); + byte[] bytes = new byte[len]; + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long value = UdfUtils.UNSAFE.getLong(null, arrayInputBufferBase + len * offsetRow); + UdfUtils.copyMemory(null, value, bytes, UdfUtils.BYTE_ARRAY_OFFSET, len); + BigInteger bigInteger = new BigInteger(UdfUtils.convertByteOrder(bytes)); + data.add(new BigDecimal(bigInteger, argTypes[argIdx].getScale())); + } + } + return data; + } + case CHAR: + case VARCHAR: + case STRING: { + ArrayList<String> data = new ArrayList<>(); + long strOffsetBase = UdfUtils.UNSAFE + .getLong(null, UdfUtils.getAddressAtOffset(inputArrayStringOffsetsPtrs, argIdx)); + for (long offsetRow = offsetStart; offsetRow < offsetEnd; ++offsetRow) { + if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase + offsetRow) == 1)) { + data.add(null); + } else { + long stringOffsetStart = (offsetRow == 0) ? 0 + : Integer.toUnsignedLong( + UdfUtils.UNSAFE.getInt(null, strOffsetBase + 4L * (offsetRow - 1))); + long stringOffsetEnd = Integer + .toUnsignedLong(UdfUtils.UNSAFE.getInt(null, strOffsetBase + 4L * offsetRow)); + + long numBytes = stringOffsetEnd - stringOffsetStart; + long base = arrayInputBufferBase + stringOffsetStart; + byte[] bytes = new byte[(int) numBytes]; + UdfUtils.copyMemory(null, base, bytes, UdfUtils.BYTE_ARRAY_OFFSET, numBytes); + data.add(new String(bytes, StandardCharsets.UTF_8)); + } + } + return data; + } + default: + throw new UdfRuntimeException("Unsupported argument type in nested array: " + type); + } + } + protected abstract long getCurrentOutputOffset(long row); /** @@ -380,11 +627,394 @@ public abstract class BaseExecutor { updateOutputOffset(offset); return true; } + case ARRAY_TYPE: { + Type type = retType.getItemType(); + return arrayTypeOutputData(obj, type, row); + } default: throw new UdfRuntimeException("Unsupported return type: " + retType); } } + public boolean arrayTypeOutputData(Object obj, Type type, long row) throws UdfRuntimeException { + long offset = getCurrentOutputOffset(row); + long bufferSize = UdfUtils.UNSAFE.getLong(null, outputIntermediateStatePtr); + long outputNullMapBase = UdfUtils.UNSAFE.getLong(null, outputArrayNullPtr); + long outputBufferBase = UdfUtils.UNSAFE.getLong(null, outputBufferPtr); + switch (type.getPrimitiveType()) { + case BOOLEAN: { + ArrayList<Boolean> data = (ArrayList<Boolean>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Boolean value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putByte(outputBufferBase + (offset + i), value ? (byte) 1 : 0); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case TINYINT: { + ArrayList<Byte> data = (ArrayList<Byte>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Byte value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putByte(outputBufferBase + (offset + i), value); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case SMALLINT: { + ArrayList<Short> data = (ArrayList<Short>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Short value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putShort(outputBufferBase + ((offset + i) * 2L), value); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case INT: { + ArrayList<Integer> data = (ArrayList<Integer>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Integer value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putInt(outputBufferBase + ((offset + i) * 4L), value); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case BIGINT: { + ArrayList<Long> data = (ArrayList<Long>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Long value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset + i) * 8L), value); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case FLOAT: { + ArrayList<Float> data = (ArrayList<Float>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Float value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putFloat(outputBufferBase + ((offset + i) * 4L), value); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DOUBLE: { + ArrayList<Double> data = (ArrayList<Double>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + Double value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + UdfUtils.UNSAFE.putDouble(outputBufferBase + ((offset + i) * 8L), value); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DATE: { + ArrayList<LocalDate> data = (ArrayList<LocalDate>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + LocalDate value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + long time = UdfUtils.convertToDate(value, LocalDate.class); + UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset + i) * 8L), time); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DATETIME: { + ArrayList<LocalDateTime> data = (ArrayList<LocalDateTime>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + LocalDateTime value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + long time = UdfUtils.convertToDateTime(value, LocalDateTime.class); + UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset + i) * 8L), time); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DATEV2: { + ArrayList<LocalDate> data = (ArrayList<LocalDate>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + LocalDate value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + int time = UdfUtils.convertToDateV2(value, LocalDate.class); + UdfUtils.UNSAFE.putInt(outputBufferBase + ((offset + i) * 4L), time); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DATETIMEV2: { + ArrayList<LocalDateTime> data = (ArrayList<LocalDateTime>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + LocalDateTime value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + long time = UdfUtils.convertToDateTimeV2(value, LocalDateTime.class); + UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset + i) * 8L), time); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case LARGEINT: { + ArrayList<BigInteger> data = (ArrayList<BigInteger>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + BigInteger bigInteger = data.get(i); + if (bigInteger == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); + byte[] value = new byte[16]; + // check data is negative + if (bigInteger.signum() == -1) { + Arrays.fill(value, (byte) -1); + } + for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { + value[index] = bytes[index]; + } + UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, + outputBufferBase + ((offset + i) * 16L), value.length); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DECIMALV2: { + ArrayList<BigDecimal> data = (ArrayList<BigDecimal>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + BigDecimal bigDecimal = data.get(i); + if (bigDecimal == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + BigInteger bigInteger = bigDecimal.setScale(9, RoundingMode.HALF_EVEN).unscaledValue(); + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); + byte[] value = new byte[16]; + // check data is negative + if (bigInteger.signum() == -1) { + Arrays.fill(value, (byte) -1); + } + for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { + value[index] = bytes[index]; + } + UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, + outputBufferBase + ((offset + i) * 16L), value.length); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: { + ArrayList<BigDecimal> data = (ArrayList<BigDecimal>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + for (int i = 0; i < num; ++i) { + BigDecimal bigDecimal = data.get(i); + if (bigDecimal == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + BigInteger bigInteger = bigDecimal.setScale(retType.getScale(), RoundingMode.HALF_EVEN) + .unscaledValue(); + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); + byte[] value = new byte[16]; + // check data is negative + if (bigInteger.signum() == -1) { + Arrays.fill(value, (byte) -1); + } + for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { + value[index] = bytes[index]; + } + UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, + outputBufferBase + ((offset + i) * 16L), value.length); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + case CHAR: + case VARCHAR: + case STRING: { + ArrayList<String> data = (ArrayList<String>) obj; + int num = data.size(); + if (offset + num > bufferSize) { + return false; + } + long outputStrOffsetBase = UdfUtils.UNSAFE.getLong(null, outputArrayStringOffsetsPtr); + for (int i = 0; i < num; ++i) { + String value = data.get(i); + if (value == null) { + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 1); + } else { + byte[] bytes = value.getBytes(StandardCharsets.UTF_8); + long strOffset = (offset + i == 0) ? 0 + : Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null, + outputStrOffsetBase + ((offset + i - 1) * 4L))); + if (strOffset + bytes.length > bufferSize) { + return false; + } + UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset + i), (byte) 0); + strOffset += bytes.length; + UdfUtils.UNSAFE.putInt(null, outputStrOffsetBase + 4L * (offset + i), + Integer.parseUnsignedInt(String.valueOf(strOffset))); + UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, + outputBufferBase + strOffset - bytes.length, bytes.length); + } + } + offset += num; + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(offset))); + updateOutputOffset(offset); + return true; + } + default: + throw new UdfRuntimeException("Unsupported argument type in nested array: " + type); + } + } + protected void updateOutputOffset(long offset) { } } diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java index 5f043f64a8..5cc295dd8e 100644 --- a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java +++ b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java @@ -72,7 +72,7 @@ public class UdfExecutor extends BaseExecutor { int batchSize = UdfUtils.UNSAFE.getInt(null, batchSizePtr); try { if (retType.equals(JavaUdfDataType.STRING) || retType.equals(JavaUdfDataType.VARCHAR) - || retType.equals(JavaUdfDataType.CHAR)) { + || retType.equals(JavaUdfDataType.CHAR) || retType.equals(JavaUdfDataType.ARRAY_TYPE)) { // If this udf return variable-size type (e.g.) String, we have to allocate output // buffer multiple times until buffer size is enough to store output column. So we // always begin with the last evaluated row instead of beginning of this batch. @@ -95,12 +95,12 @@ public class UdfExecutor extends BaseExecutor { } } } catch (Exception e) { - if (retType.equals(JavaUdfDataType.STRING)) { + if (retType.equals(JavaUdfDataType.STRING) || retType.equals(JavaUdfDataType.ARRAY_TYPE)) { UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8, batchSize); } throw new UdfRuntimeException("UDF::evaluate() ran into a problem.", e); } - if (retType.equals(JavaUdfDataType.STRING)) { + if (retType.equals(JavaUdfDataType.STRING) || retType.equals(JavaUdfDataType.ARRAY_TYPE)) { UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8, rowIdx); } } @@ -131,6 +131,9 @@ public class UdfExecutor extends BaseExecutor { if (retType.equals(JavaUdfDataType.STRING)) { UdfUtils.UNSAFE.putInt(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 4L * row, Integer.parseUnsignedInt(String.valueOf(outputOffset))); + } else if (retType.equals(JavaUdfDataType.ARRAY_TYPE)) { + UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null, outputOffsetsPtr) + 8L * row, + Long.parseUnsignedLong(String.valueOf(outputOffset))); } return true; } diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java index 4746350cff..9df2174636 100644 --- a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java +++ b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java @@ -17,6 +17,7 @@ package org.apache.doris.udf; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; @@ -89,13 +90,15 @@ public class UdfUtils { DATETIMEV2("DATETIMEV2", TPrimitiveType.DATETIMEV2, 8), DECIMAL32("DECIMAL32", TPrimitiveType.DECIMAL32, 4), DECIMAL64("DECIMAL64", TPrimitiveType.DECIMAL64, 8), - DECIMAL128("DECIMAL128", TPrimitiveType.DECIMAL128I, 16); + DECIMAL128("DECIMAL128", TPrimitiveType.DECIMAL128I, 16), + ARRAY_TYPE("ARRAY_TYPE", TPrimitiveType.ARRAY, 0); private final String description; private final TPrimitiveType thriftType; private final int len; private int precision; private int scale; + private Type itemType; JavaUdfDataType(String description, TPrimitiveType thriftType, int len) { this.description = description; @@ -144,6 +147,8 @@ public class UdfUtils { } else if (c == BigDecimal.class) { return Sets.newHashSet(JavaUdfDataType.DECIMALV2, JavaUdfDataType.DECIMAL32, JavaUdfDataType.DECIMAL64, JavaUdfDataType.DECIMAL128); + } else if (c == java.util.ArrayList.class) { + return Sets.newHashSet(JavaUdfDataType.ARRAY_TYPE); } return Sets.newHashSet(JavaUdfDataType.INVALID_TYPE); } @@ -175,6 +180,14 @@ public class UdfUtils { public void setScale(int scale) { this.scale = scale; } + + public Type getItemType() { + return itemType; + } + + public void setItemType(Type type) { + this.itemType = type; + } } protected static Pair<Type, Integer> fromThrift(TTypeDesc typeDesc, int nodeIdx) throws InternalException { @@ -201,6 +214,14 @@ public class UdfUtils { } break; } + case ARRAY: { + Preconditions.checkState(nodeIdx + 1 < typeDesc.getTypesSize()); + Pair<Type, Integer> childType = fromThrift(typeDesc, nodeIdx + 1); + type = new ArrayType(childType.first); + nodeIdx = childType.second; + break; + } + default: throw new InternalException("Return type " + node.getType() + " is not supported now!"); } @@ -238,7 +259,7 @@ public class UdfUtils { } public static URLClassLoader getClassLoader(String jarPath, ClassLoader parent) - throws MalformedURLException, FileNotFoundException { + throws MalformedURLException, FileNotFoundException { File file = new File(jarPath); if (!file.exists()) { throw new FileNotFoundException("Can not find local file: " + jarPath); @@ -268,6 +289,13 @@ public class UdfUtils { if (retType.isDecimalV3() || retType.isDatetimeV2()) { result.setPrecision(retType.getPrecision()); result.setScale(((ScalarType) retType).getScalarScale()); + } else if (retType.isArrayType()) { + ArrayType arrType = (ArrayType) retType; + result.setItemType(arrType.getItemType()); + if (arrType.getItemType().isDatetimeV2() || arrType.getItemType().isDecimalV3()) { + result.setPrecision(arrType.getItemType().getPrecision()); + result.setScale(((ScalarType) arrType.getItemType()).getScalarScale()); + } } return Pair.of(res.length != 0, result); } @@ -290,6 +318,9 @@ public class UdfUtils { if (parameterTypes[finalI].isDecimalV3() || parameterTypes[finalI].isDatetimeV2()) { inputArgTypes[i].setPrecision(parameterTypes[finalI].getPrecision()); inputArgTypes[i].setScale(((ScalarType) parameterTypes[finalI]).getScalarScale()); + } else if (parameterTypes[finalI].isArrayType()) { + ArrayType arrType = (ArrayType) parameterTypes[finalI]; + inputArgTypes[i].setItemType(arrType.getItemType()); } if (res.length == 0) { return Pair.of(false, inputArgTypes); diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index d1c9304eb7..5407952daa 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -426,6 +426,18 @@ struct TJavaUdfExecutorCtorParams { // this is used to pass place or places to FE, which could help us call jni // only once and can process a batch size data in JAVA-Udaf 11: optional i64 input_places_ptr + + // for array type about nested column null map + 12: optional i64 input_array_nulls_buffer_ptr + + // used for array type of nested string column offset + 13: optional i64 input_array_string_offsets_ptrs + + // for array type about nested column null map when output + 14: optional i64 output_array_null_ptr + + // used for array type of nested string column offset when output + 15: optional i64 output_array_string_offsets_ptr } // Contains all interesting statistics from a single 'memory pool' in the JVM. diff --git a/regression-test/data/javaudf_p0/test_javaudf_array.out b/regression-test/data/javaudf_p0/test_javaudf_array.out new file mode 100644 index 0000000000..ad5bda0b92 --- /dev/null +++ b/regression-test/data/javaudf_p0/test_javaudf_array.out @@ -0,0 +1,133 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_default -- +1 2 2022-01-01 2022-01-01T11:11:11 a1b +2 4 2022-01-01 2022-01-01T11:11:11 a2b +3 6 2022-01-01 2022-01-01T11:11:11 a3b +4 8 2022-01-01 2022-01-01T11:11:11 a4b +5 10 2022-01-01 2022-01-01T11:11:11 a5b +6 12 2022-01-01 2022-01-01T11:11:11 a6b +7 14 2022-01-01 2022-01-01T11:11:11 a7b +8 16 2022-01-01 2022-01-01T11:11:11 a8b +9 18 2022-01-01 2022-01-01T11:11:11 a9b +10 20 2022-06-06 2022-01-01T12:12:12 a10b + +-- !select_1 -- +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 + +-- !select_2 -- +\N + +-- !select_3 -- +[1] 2 +[2] 4 +[3] 6 +[4] 8 +[5] 10 +[6] 12 +[7] 14 +[8] 16 +[9] 18 +[10] 20 + +-- !select_4 -- +[2] 2 +[4] 4 +[6] 6 +[8] 8 +[10] 10 +[12] 12 +[14] 14 +[16] 16 +[18] 18 +[20] 20 + +-- !select_5 -- +\N + +-- !select_6 -- +['a1b'] 2 +['a2b'] 4 +['a3b'] 6 +['a4b'] 8 +['a5b'] 10 +['a6b'] 12 +['a7b'] 14 +['a8b'] 16 +['a9b'] 18 +['a10b'] 20 + +-- !select_7 -- +['a1b1'] 2 +['a2b2'] 4 +['a3b3'] 6 +['a4b4'] 8 +['a5b5'] 10 +['a6b6'] 12 +['a7b7'] 14 +['a8b8'] 16 +['a9b9'] 18 +['a10b10'] 20 + +-- !select_8 -- +\N + +-- !select_9 -- +a1b 2 +a2b 4 +a3b 6 +a4b 8 +a5b 10 +a6b 12 +a7b 14 +a8b 16 +a9b 18 +a10b 20 + +-- !select_10 -- +a1b1 2 +a2b2 4 +a3b3 6 +a4b4 8 +a5b5 10 +a6b6 12 +a7b7 14 +a8b8 16 +a9b9 18 +a10b10 20 + +-- !select_11 -- +\N + +-- !select_12 -- +[2022-01-01 11:11:11] 2 +[2022-01-01 11:11:11] 4 +[2022-01-01 11:11:11] 6 +[2022-01-01 11:11:11] 8 +[2022-01-01 11:11:11] 10 +[2022-01-01 11:11:11] 12 +[2022-01-01 11:11:11] 14 +[2022-01-01 11:11:11] 16 +[2022-01-01 11:11:11] 18 +[2022-01-01 12:12:12] 20 + +-- !select_13 -- +[2022-01-01] 2 +[2022-01-01] 4 +[2022-01-01] 6 +[2022-01-01] 8 +[2022-01-01] 10 +[2022-01-01] 12 +[2022-01-01] 14 +[2022-01-01] 16 +[2022-01-01] 18 +[2022-06-06] 20 + diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTest.java new file mode 100644 index 0000000000..8660faea8c --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTest.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.time.LocalDate; +import java.util.ArrayList; + +public class ArrayDateTest extends UDF { + public ArrayList<LocalDate> evaluate(ArrayList<LocalDate> date) { + return date; + } +} diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTimeTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTimeTest.java new file mode 100644 index 0000000000..ed51ccc292 --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTimeTest.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.time.LocalDateTime; +import java.util.ArrayList; + +public class ArrayDateTimeTest extends UDF { + public ArrayList<LocalDateTime> evaluate(ArrayList<LocalDateTime> date) { + return date; + } +} diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayIntTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayIntTest.java new file mode 100644 index 0000000000..4d7f2c636e --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayIntTest.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.util.ArrayList; + +public class ArrayIntTest extends UDF { + public Integer evaluate(ArrayList<Integer> res) { + Integer value = 0; + if (res == null) { + return null; + } + for (int i =0; i < res.size(); ++i) { + Integer data = res.get(i); + if (data != null) { + value = value + data; + } + } + return value; + } +} diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayIntTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayIntTest.java new file mode 100644 index 0000000000..ed9235f911 --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayIntTest.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.util.ArrayList; + +public class ArrayReturnArrayIntTest extends UDF { + public ArrayList<Integer> evaluate(ArrayList<Integer> res) { + Integer value = 0; + if (res == null) { + return null; + } + for (int i =0; i < res.size(); ++i) { + Integer data = res.get(i); + if (data != null) { + value = value + data; + } + } + ArrayList<Integer> result = new ArrayList<Integer>(); + result.add(value); + return result; + } +} diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayStringTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayStringTest.java new file mode 100644 index 0000000000..0f0d73db86 --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayStringTest.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.util.ArrayList; + +public class ArrayReturnArrayStringTest extends UDF { + public ArrayList<String> evaluate(ArrayList<String> res) { + String value = ""; + if (res == null) { + return null; + } + for (int i = 0; i < res.size(); ++i) { + String data = res.get(i); + if (data != null) { + System.out.println(data); + value = value + data; + } + } + ArrayList<String> result = new ArrayList<String>(); + result.add(value); + return result; + } +} diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayStringTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayStringTest.java new file mode 100644 index 0000000000..ea860babac --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayStringTest.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.util.ArrayList; + +public class ArrayStringTest extends UDF { + public String evaluate(ArrayList<String> res) { + String value = ""; + if (res == null) { + return null; + } + for (int i = 0; i < res.size(); ++i) { + String data = res.get(i); + if (data != null) { + System.out.println(data); + value = value + data; + } + } + return value; + } +} diff --git a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy new file mode 100644 index 0000000000..15ecb239fc --- /dev/null +++ b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_javaudf_array") { + def tableName = "test_javaudf_array" + def jarPath = """${context.file.parent}/jars/java-udf-case-jar-with-dependencies.jar""" + + log.info("Jar path: ${jarPath}".toString()) + try { + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `user_id` INT NOT NULL COMMENT "", + `tinyint_col` TINYINT NOT NULL COMMENT "", + `datev2_col` datev2 NOT NULL COMMENT "", + `datetimev2_col` datetimev2 NOT NULL COMMENT "", + `string_col` STRING NOT NULL COMMENT "" + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + StringBuilder sb = new StringBuilder() + int i = 1 + for (; i < 10; i ++) { + sb.append(""" + (${i},${i}*2,'2022-01-01','2022-01-01 11:11:11','a${i}b'), + """) + } + sb.append(""" + (${i},${i}*2,'2022-06-06','2022-01-01 12:12:12','a${i}b') + """) + sql """ INSERT INTO ${tableName} VALUES + ${sb.toString()} + """ + qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id; """ + + File path = new File(jarPath) + if (!path.exists()) { + throw new IllegalStateException("""${jarPath} doesn't exist! """) + } + + sql """ DROP FUNCTION IF EXISTS java_udf_array_int_test(array<int>); """ + sql """ CREATE FUNCTION java_udf_array_int_test(array<int>) RETURNS int PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.ArrayIntTest", + "type"="JAVA_UDF" + ); """ + qt_select_1 """ SELECT java_udf_array_int_test(array(user_id)) result FROM ${tableName} ORDER BY result; """ + qt_select_2 """ SELECT java_udf_array_int_test(null) result ; """ + + + sql """ DROP FUNCTION IF EXISTS java_udf_array_return_int_test(array<int>); """ + sql """ CREATE FUNCTION java_udf_array_return_int_test(array<int>) RETURNS array<int> PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.ArrayReturnArrayIntTest", + "type"="JAVA_UDF" + ); """ + qt_select_3 """ SELECT java_udf_array_return_int_test(array(user_id)), tinyint_col as result FROM ${tableName} ORDER BY result; """ + qt_select_4 """ SELECT java_udf_array_return_int_test(array(user_id,user_id)), tinyint_col as result FROM ${tableName} ORDER BY result; """ + qt_select_5 """ SELECT java_udf_array_return_int_test(null) result ; """ + + + sql """ DROP FUNCTION IF EXISTS java_udf_array_return_string_test(array<string>); """ + sql """ CREATE FUNCTION java_udf_array_return_string_test(array<string>) RETURNS array<string> PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.ArrayReturnArrayStringTest", + "type"="JAVA_UDF" + ); """ + qt_select_6 """ SELECT java_udf_array_return_string_test(array(string_col)), tinyint_col as result FROM ${tableName} ORDER BY result; """ + qt_select_7 """ SELECT java_udf_array_return_string_test(array(string_col, cast(user_id as string))), tinyint_col as result FROM ${tableName} ORDER BY result; """ + qt_select_8 """ SELECT java_udf_array_return_string_test(null) result ; """ + + sql """ DROP FUNCTION IF EXISTS java_udf_array_string_test(array<string>); """ + sql """ CREATE FUNCTION java_udf_array_string_test(array<string>) RETURNS string PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.ArrayStringTest", + "type"="JAVA_UDF" + ); """ + qt_select_9 """ SELECT java_udf_array_string_test(array(string_col)), tinyint_col as result FROM ${tableName} ORDER BY result; """ + qt_select_10 """ SELECT java_udf_array_string_test(array(string_col, cast(user_id as string))), tinyint_col as result FROM ${tableName} ORDER BY result; """ + qt_select_11 """ SELECT java_udf_array_string_test(null) result ; """ + + //ArrayDateTimeTest + sql """ DROP FUNCTION IF EXISTS java_udf_array_datatime_test(array<datetime>); """ + sql """ CREATE FUNCTION java_udf_array_datatime_test(array<datetime>) RETURNS array<datetime> PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.ArrayDateTimeTest", + "type"="JAVA_UDF" + ); """ + qt_select_12 """ SELECT java_udf_array_datatime_test(array(datetimev2_col)), tinyint_col as result FROM ${tableName} ORDER BY result; """ + + sql """ DROP FUNCTION IF EXISTS java_udf_array_date_test(array<date>); """ + sql """ CREATE FUNCTION java_udf_array_date_test(array<date>) RETURNS array<date> PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.ArrayDateTest", + "type"="JAVA_UDF" + ); """ + qt_select_13 """ SELECT java_udf_array_date_test(array(datev2_col)), tinyint_col as result FROM ${tableName} ORDER BY result; """ + + } finally { + try_sql("DROP TABLE IF EXISTS ${tableName}") + } +} diff --git a/run-regression-test.sh b/run-regression-test.sh index 1f6d316dae..a8ea5f976c 100755 --- a/run-regression-test.sh +++ b/run-regression-test.sh @@ -159,11 +159,8 @@ if ! test -f ${RUN_JAR:+${RUN_JAR}}; then mkdir -p "${OUTPUT_DIR}"/{lib,log} cp -r "${REGRESSION_TEST_BUILD_DIR}"/regression-test-*.jar "${OUTPUT_DIR}/lib" -fi -# build jar needed by java-udf case -JAVAUDF_JAR="${DORIS_HOME}/regression-test/java-udf-src/target/java-udf-case-jar-with-dependencies.jar" -if ! test -f ${JAVAUDF_JAR:+${JAVAUDF_JAR}}; then + echo "===== BUILD JAVA_UDF_SRC TO GENERATE JAR =====" mkdir -p "${DORIS_HOME}"/regression-test/suites/javaudf_p0/jars cd "${DORIS_HOME}"/regression-test/java-udf-src "${MVN_CMD}" package --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org