This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 77bd8e070 fix: correct array_append return type and mark as Compatible
(#3795)
77bd8e070 is described below
commit 77bd8e070226e6883eec31eea6c14c4ff7a2412d
Author: Andy Grove <[email protected]>
AuthorDate: Fri Mar 27 13:36:45 2026 -0600
fix: correct array_append return type and mark as Compatible (#3795)
---
docs/source/user-guide/latest/expressions.md | 2 +-
spark/src/main/scala/org/apache/comet/serde/arrays.scala | 7 +++++--
.../test/resources/sql-tests/expressions/array/array_append.sql | 3 ++-
3 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/docs/source/user-guide/latest/expressions.md
b/docs/source/user-guide/latest/expressions.md
index c8e5475d0..c3aca6f67 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -232,7 +232,7 @@ Comet supports using the following aggregate functions
within window contexts wi
| Expression | Spark-Compatible? | Compatibility Notes
|
| -------------- | ----------------- |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
-| ArrayAppend | No |
|
+| ArrayAppend | Yes |
|
| ArrayCompact | No |
|
| ArrayContains | No | Returns null instead of false for empty
arrays with literal values
([#3346](https://github.com/apache/datafusion-comet/issues/3346))
|
| ArrayDistinct | No | Behaves differently than spark. Comet
first sorts then removes duplicates while Spark preserves the original order.
|
diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala
b/spark/src/main/scala/org/apache/comet/serde/arrays.scala
index 298c47308..5d10ff8a3 100644
--- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala
@@ -88,7 +88,7 @@ object CometArrayRemove
object CometArrayAppend extends CometExpressionSerde[ArrayAppend] {
- override def getSupportLevel(expr: ArrayAppend): SupportLevel =
Incompatible(None)
+ override def getSupportLevel(expr: ArrayAppend): SupportLevel = Compatible()
override def convert(
expr: ArrayAppend,
@@ -100,10 +100,13 @@ object CometArrayAppend extends
CometExpressionSerde[ArrayAppend] {
val arrayExprProto = exprToProto(expr.children.head, inputs, binding)
val keyExprProto = exprToProto(expr.children(1), inputs, binding)
+ // DataFusion's array_append always returns a list with nullable elements,
+ // so we must promise ArrayType(elementType, containsNull = true) here
even if
+ // Spark's expr.dataType has containsNull = false (e.g. for array(1,2,3)).
val arrayAppendScalarExpr =
scalarFunctionExprToProtoWithReturnType(
"array_append",
- ArrayType(elementType = elementType),
+ ArrayType(elementType, containsNull = true),
false,
arrayExprProto,
keyExprProto)
diff --git
a/spark/src/test/resources/sql-tests/expressions/array/array_append.sql
b/spark/src/test/resources/sql-tests/expressions/array/array_append.sql
index 0020af3f3..53f259340 100644
--- a/spark/src/test/resources/sql-tests/expressions/array/array_append.sql
+++ b/spark/src/test/resources/sql-tests/expressions/array/array_append.sql
@@ -15,8 +15,9 @@
-- specific language governing permissions and limitations
-- under the License.
+-- On Spark 4.0, array_append is a RuntimeReplaceable that rewrites to
array_insert(-1),
+-- so we need to allow the incompatible array_insert to run natively there.
-- Config: spark.comet.expression.ArrayInsert.allowIncompatible=true
--- Config: spark.comet.expression.ArrayAppend.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true
statement
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]