xinrong-meng commented on code in PR #49786: URL: https://github.com/apache/spark/pull/49786#discussion_r1942053421
########## sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala: ########## @@ -268,12 +270,26 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] { evalTypes.mkString(",")) } val evalType = evalTypes.head + + val hasUDTInput = child.output.exists( + attr => attr.dataType.isInstanceOf[UserDefinedType[_]]) + val hasUDTReturn = validUdfs.exists(udf => udf.dataType.isInstanceOf[UserDefinedType[_]]) + val evaluation = evalType match { case PythonEvalType.SQL_BATCHED_UDF => BatchEvalPython(validUdfs, resultAttrs, child) - case PythonEvalType.SQL_SCALAR_PANDAS_UDF | PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF - | PythonEvalType.SQL_ARROW_BATCHED_UDF => + case PythonEvalType.SQL_SCALAR_PANDAS_UDF | PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF => ArrowEvalPython(validUdfs, resultAttrs, child, evalType) + case PythonEvalType.SQL_ARROW_BATCHED_UDF => + + if (hasUDTInput || hasUDTReturn) { + // Use BatchEvalPython if UDT is detected + logWarning("Arrow optimization disabled due to UDT input or return type. " + + "Falling back to non-Arrow-optimized UDF execution.") Review Comment: Adjusted thank you! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org