This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 2c6a8ac78 chore: Add ANSI mode SQL test files for expressions that
throw on invalid input (#3377)
2c6a8ac78 is described below
commit 2c6a8ac7813e66527b7247f28aace530187c2764
Author: Andy Grove <[email protected]>
AuthorDate: Thu Feb 5 17:48:37 2026 -0700
chore: Add ANSI mode SQL test files for expressions that throw on invalid
input (#3377)
This PR adds:
1. Framework support for `query expect_error(<pattern>)` mode in the SQL
test
framework, which verifies both Spark and Comet throw exceptions
containing
the given pattern.
2. New ANSI mode test files:
- `math/abs_ansi.sql` - Tests abs overflow on INT_MIN, LONG_MIN, etc.
- `math/arithmetic_ansi.sql` - Tests arithmetic overflow and
divide-by-zero
- `array/get_array_item_ansi.sql` - Tests out-of-bounds array access
(ignored pending #3375)
- `array/element_at_ansi.sql` - Tests out-of-bounds element_at (ignored
pending #3375)
3. Documentation for the new `expect_error` query mode.
Co-authored-by: Claude Opus 4.5 <[email protected]>
---
docs/source/contributor-guide/sql-file-tests.md | 21 +++
.../expressions/array/element_at_ansi.sql | 74 +++++++++
.../expressions/array/get_array_item_ansi.sql | 58 +++++++
.../sql-tests/expressions/math/abs_ansi.sql | 97 ++++++++++++
.../sql-tests/expressions/math/arithmetic_ansi.sql | 167 +++++++++++++++++++++
.../org/apache/comet/CometSqlFileTestSuite.scala | 14 ++
.../scala/org/apache/comet/SqlFileTestParser.scala | 4 +
7 files changed, 435 insertions(+)
diff --git a/docs/source/contributor-guide/sql-file-tests.md
b/docs/source/contributor-guide/sql-file-tests.md
index b2dee3a3b..1a55323b8 100644
--- a/docs/source/contributor-guide/sql-file-tests.md
+++ b/docs/source/contributor-guide/sql-file-tests.md
@@ -194,6 +194,27 @@ query
ignore(https://github.com/apache/datafusion-comet/issues/3326)
SELECT space(n) FROM test_space WHERE n < 0
```
+#### `query expect_error(<pattern>)`
+
+Asserts that both Spark and Comet throw an exception containing the given
pattern. Use this
+for ANSI mode tests where invalid operations should throw errors.
+
+```sql
+-- Config: spark.sql.ansi.enabled=true
+
+-- integer overflow should throw in ANSI mode
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT 2147483647 + 1
+
+-- division by zero should throw in ANSI mode
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 / 0
+
+-- array out of bounds should throw in ANSI mode
+query expect_error(INVALID_ARRAY_INDEX)
+SELECT array(1, 2, 3)[10]
+```
+
## Adding a new test
1. Create a `.sql` file under the appropriate subdirectory in
diff --git
a/spark/src/test/resources/sql-tests/expressions/array/element_at_ansi.sql
b/spark/src/test/resources/sql-tests/expressions/array/element_at_ansi.sql
new file mode 100644
index 000000000..0a3f41890
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/array/element_at_ansi.sql
@@ -0,0 +1,74 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode element_at tests
+-- Tests that element_at throws exceptions for out-of-bounds access in ANSI
mode
+-- Note: element_at uses 1-based indexing
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_element_at_oob(arr array<int>) USING parquet
+
+statement
+INSERT INTO ansi_element_at_oob VALUES (array(1, 2, 3))
+
+-- ============================================================================
+-- element_at index out of bounds (positive index)
+-- Spark throws: [INVALID_ARRAY_INDEX_IN_ELEMENT_AT] ...
+-- Comet throws: Index out of bounds for array
+-- See https://github.com/apache/datafusion-comet/issues/3375
+-- ============================================================================
+
+-- index beyond array length should throw (1-based indexing)
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(arr, 10) FROM ansi_element_at_oob
+
+-- literal array with out of bounds access
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(array(1, 2, 3), 5)
+
+-- ============================================================================
+-- element_at with index 0 (invalid)
+-- Spark throws: [INVALID_INDEX_OF_ZERO] The index 0 is invalid
+-- Comet throws: different error message
+-- See https://github.com/apache/datafusion-comet/issues/3375
+-- ============================================================================
+
+-- index 0 is not valid for element_at (1-based indexing)
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(arr, 0) FROM ansi_element_at_oob
+
+-- literal with index 0
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(array(1, 2, 3), 0)
+
+-- ============================================================================
+-- element_at index out of bounds (negative index beyond array)
+-- ============================================================================
+
+-- negative index beyond array size should throw
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(arr, -10) FROM ansi_element_at_oob
+
+-- literal with negative out of bounds
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(array(1, 2, 3), -5)
diff --git
a/spark/src/test/resources/sql-tests/expressions/array/get_array_item_ansi.sql
b/spark/src/test/resources/sql-tests/expressions/array/get_array_item_ansi.sql
new file mode 100644
index 000000000..19d0cb2a6
--- /dev/null
+++
b/spark/src/test/resources/sql-tests/expressions/array/get_array_item_ansi.sql
@@ -0,0 +1,58 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode array index access tests
+-- Tests that array[index] throws exceptions for out-of-bounds access in ANSI
mode
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_array_oob(arr array<int>) USING parquet
+
+statement
+INSERT INTO ansi_array_oob VALUES (array(1, 2, 3))
+
+-- ============================================================================
+-- Array index out of bounds (positive index)
+-- Spark throws: [INVALID_ARRAY_INDEX] The index X is out of bounds
+-- Comet throws: Index out of bounds for array
+-- See https://github.com/apache/datafusion-comet/issues/3375
+-- ============================================================================
+
+-- index beyond array length should throw (0-based indexing)
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT arr[10] FROM ansi_array_oob
+
+-- literal array with out of bounds access
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT array(1, 2, 3)[5]
+
+-- ============================================================================
+-- Array index out of bounds (negative index)
+-- ============================================================================
+
+-- negative index should throw
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT arr[-1] FROM ansi_array_oob
+
+-- literal with negative index
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT array(1, 2, 3)[-1]
diff --git a/spark/src/test/resources/sql-tests/expressions/math/abs_ansi.sql
b/spark/src/test/resources/sql-tests/expressions/math/abs_ansi.sql
new file mode 100644
index 000000000..c89a2958c
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/math/abs_ansi.sql
@@ -0,0 +1,97 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode abs function tests
+-- Tests that abs throws exceptions for overflow on minimum integer values
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_test_abs_int(v int) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_int VALUES (-2147483648)
+
+statement
+CREATE TABLE ansi_test_abs_long(v long) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_long VALUES (-9223372036854775808)
+
+statement
+CREATE TABLE ansi_test_abs_short(v short) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_short VALUES (-32768)
+
+statement
+CREATE TABLE ansi_test_abs_byte(v tinyint) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_byte VALUES (-128)
+
+-- ============================================================================
+-- abs(INT_MIN) overflow
+-- ============================================================================
+
+-- abs(-2147483648) cannot be represented as int (since INT_MAX = 2147483647)
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_int
+
+-- literal
+query expect_error(overflow)
+SELECT abs(-2147483648)
+
+-- ============================================================================
+-- abs(LONG_MIN) overflow
+-- ============================================================================
+
+-- abs(-9223372036854775808) cannot be represented as long
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_long
+
+-- literal
+query expect_error(overflow)
+SELECT abs(-9223372036854775808L)
+
+-- ============================================================================
+-- abs(SHORT_MIN) overflow
+-- ============================================================================
+
+-- abs(-32768) cannot be represented as short
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_short
+
+-- literal
+query expect_error(overflow)
+SELECT abs(cast(-32768 as short))
+
+-- ============================================================================
+-- abs(BYTE_MIN) overflow
+-- ============================================================================
+
+-- abs(-128) cannot be represented as tinyint
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_byte
+
+-- literal
+query expect_error(overflow)
+SELECT abs(cast(-128 as tinyint))
diff --git
a/spark/src/test/resources/sql-tests/expressions/math/arithmetic_ansi.sql
b/spark/src/test/resources/sql-tests/expressions/math/arithmetic_ansi.sql
new file mode 100644
index 000000000..093e5a3f3
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/math/arithmetic_ansi.sql
@@ -0,0 +1,167 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode arithmetic tests
+-- Tests that ANSI mode throws exceptions for overflow and division by zero
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup for integer overflow
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_int_overflow(a int, b int) USING parquet
+
+statement
+INSERT INTO ansi_int_overflow VALUES (2147483647, 1), (-2147483648, 1),
(-2147483648, -1)
+
+statement
+CREATE TABLE ansi_long_overflow(a long, b long) USING parquet
+
+statement
+INSERT INTO ansi_long_overflow VALUES (9223372036854775807, 1),
(-9223372036854775808, 1), (-9223372036854775808, -1)
+
+statement
+CREATE TABLE ansi_div_zero(a int, b int, c long, d long) USING parquet
+
+statement
+INSERT INTO ansi_div_zero VALUES (1, 0, 1, 0)
+
+-- ============================================================================
+-- Integer addition overflow
+-- ============================================================================
+
+-- INT_MAX + 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a + b FROM ansi_int_overflow WHERE a = 2147483647
+
+-- literal overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT 2147483647 + 1
+
+-- ============================================================================
+-- Integer subtraction overflow
+-- ============================================================================
+
+-- INT_MIN - 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a - b FROM ansi_int_overflow WHERE a = -2147483648
+
+-- literal overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -2147483648 - 1
+
+-- ============================================================================
+-- Integer multiplication overflow
+-- ============================================================================
+
+-- INT_MAX * 2 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a * 2 FROM ansi_int_overflow WHERE a = 2147483647
+
+-- literal overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT 2147483647 * 2
+
+-- ============================================================================
+-- Long addition overflow
+-- ============================================================================
+
+-- LONG_MAX + 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a + b FROM ansi_long_overflow WHERE a = 9223372036854775807
+
+-- ============================================================================
+-- Long subtraction overflow
+-- ============================================================================
+
+-- LONG_MIN - 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a - b FROM ansi_long_overflow WHERE a = -9223372036854775808
+
+-- ============================================================================
+-- Long multiplication overflow
+-- ============================================================================
+
+-- LONG_MAX * 2 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a * 2 FROM ansi_long_overflow WHERE a = 9223372036854775807
+
+-- ============================================================================
+-- Integer division by zero
+-- ============================================================================
+
+-- column / 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT a / b FROM ansi_div_zero
+
+-- column div 0 (integral division) should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT a div b FROM ansi_div_zero
+
+-- column % 0 (remainder) should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT a % b FROM ansi_div_zero
+
+-- literal / 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 / 0
+
+-- literal div 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 div 0
+
+-- literal % 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 % 0
+
+-- ============================================================================
+-- Long division by zero
+-- ============================================================================
+
+-- long column / 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT c / d FROM ansi_div_zero
+
+-- long column div 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT c div d FROM ansi_div_zero
+
+-- long column % 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT c % d FROM ansi_div_zero
+
+-- ============================================================================
+-- Unary minus overflow
+-- ============================================================================
+
+-- negating INT_MIN should overflow (since INT_MAX is 2147483647,
-(-2147483648) cannot fit)
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -a FROM ansi_int_overflow WHERE a = -2147483648
+
+-- negating LONG_MIN should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -a FROM ansi_long_overflow WHERE a = -9223372036854775808
+
+-- literal negation overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -(-2147483648)
+
+-- literal long negation overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -(-9223372036854775808L)
diff --git a/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
b/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
index 136152ef7..4e3b9e045 100644
--- a/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
@@ -101,6 +101,20 @@ class CometSqlFileTestSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
checkSparkAnswerAndFallbackReason(sql, reason)
case Ignore(reason) =>
logInfo(s"IGNORED query (${reason}): $sql")
+ case ExpectError(pattern) =>
+ val (sparkError, cometError) =
checkSparkAnswerMaybeThrows(spark.sql(sql))
+ assert(
+ sparkError.isDefined,
+ s"Expected Spark to throw an error matching '$pattern' but
query succeeded")
+ assert(
+ cometError.isDefined,
+ s"Expected Comet to throw an error matching '$pattern' but
query succeeded")
+ assert(
+ sparkError.get.getMessage.contains(pattern),
+ s"Spark error '${sparkError.get.getMessage}' does not
contain '$pattern'")
+ assert(
+ cometError.get.getMessage.contains(pattern),
+ s"Comet error '${cometError.get.getMessage}' does not
contain '$pattern'")
}
}
}
diff --git a/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
b/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
index 7a98fd57b..45198ed17 100644
--- a/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
+++ b/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
@@ -55,6 +55,7 @@ case object SparkAnswerOnly extends QueryAssertionMode
case class WithTolerance(tol: Double) extends QueryAssertionMode
case class ExpectFallback(reason: String) extends QueryAssertionMode
case class Ignore(reason: String) extends QueryAssertionMode
+case class ExpectError(pattern: String) extends QueryAssertionMode
/**
* Parsed representation of a .sql test file.
@@ -145,6 +146,7 @@ object SqlFileTestParser {
private val FallbackPattern = """query\s+expect_fallback\((.+)\)""".r
private val IgnorePattern = """query\s+ignore\((.+)\)""".r
+ private val ErrorPattern = """query\s+expect_error\((.+)\)""".r
private def parseQueryAssertionMode(directive: String): QueryAssertionMode =
{
directive match {
@@ -152,6 +154,8 @@ object SqlFileTestParser {
ExpectFallback(reason.trim)
case IgnorePattern(reason) =>
Ignore(reason.trim)
+ case ErrorPattern(pattern) =>
+ ExpectError(pattern.trim)
case _ =>
val parts = directive.split("\\s+")
if (parts.length == 1) return CheckCoverageAndAnswer
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]