This is an automated email from the ASF dual-hosted git repository.

mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 2c6a8ac78 chore: Add ANSI mode SQL test files for expressions that 
throw on invalid input (#3377)
2c6a8ac78 is described below

commit 2c6a8ac7813e66527b7247f28aace530187c2764
Author: Andy Grove <[email protected]>
AuthorDate: Thu Feb 5 17:48:37 2026 -0700

    chore: Add ANSI mode SQL test files for expressions that throw on invalid 
input (#3377)
    
    This PR adds:
    
    1. Framework support for `query expect_error(<pattern>)` mode in the SQL 
test
       framework, which verifies both Spark and Comet throw exceptions 
containing
       the given pattern.
    
    2. New ANSI mode test files:
       - `math/abs_ansi.sql` - Tests abs overflow on INT_MIN, LONG_MIN, etc.
       - `math/arithmetic_ansi.sql` - Tests arithmetic overflow and 
divide-by-zero
       - `array/get_array_item_ansi.sql` - Tests out-of-bounds array access 
(ignored pending #3375)
       - `array/element_at_ansi.sql` - Tests out-of-bounds element_at (ignored 
pending #3375)
    
    3. Documentation for the new `expect_error` query mode.
    
    Co-authored-by: Claude Opus 4.5 <[email protected]>
---
 docs/source/contributor-guide/sql-file-tests.md    |  21 +++
 .../expressions/array/element_at_ansi.sql          |  74 +++++++++
 .../expressions/array/get_array_item_ansi.sql      |  58 +++++++
 .../sql-tests/expressions/math/abs_ansi.sql        |  97 ++++++++++++
 .../sql-tests/expressions/math/arithmetic_ansi.sql | 167 +++++++++++++++++++++
 .../org/apache/comet/CometSqlFileTestSuite.scala   |  14 ++
 .../scala/org/apache/comet/SqlFileTestParser.scala |   4 +
 7 files changed, 435 insertions(+)

diff --git a/docs/source/contributor-guide/sql-file-tests.md 
b/docs/source/contributor-guide/sql-file-tests.md
index b2dee3a3b..1a55323b8 100644
--- a/docs/source/contributor-guide/sql-file-tests.md
+++ b/docs/source/contributor-guide/sql-file-tests.md
@@ -194,6 +194,27 @@ query 
ignore(https://github.com/apache/datafusion-comet/issues/3326)
 SELECT space(n) FROM test_space WHERE n < 0
 ```
 
+#### `query expect_error(<pattern>)`
+
+Asserts that both Spark and Comet throw an exception containing the given 
pattern. Use this
+for ANSI mode tests where invalid operations should throw errors.
+
+```sql
+-- Config: spark.sql.ansi.enabled=true
+
+-- integer overflow should throw in ANSI mode
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT 2147483647 + 1
+
+-- division by zero should throw in ANSI mode
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 / 0
+
+-- array out of bounds should throw in ANSI mode
+query expect_error(INVALID_ARRAY_INDEX)
+SELECT array(1, 2, 3)[10]
+```
+
 ## Adding a new test
 
 1. Create a `.sql` file under the appropriate subdirectory in
diff --git 
a/spark/src/test/resources/sql-tests/expressions/array/element_at_ansi.sql 
b/spark/src/test/resources/sql-tests/expressions/array/element_at_ansi.sql
new file mode 100644
index 000000000..0a3f41890
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/array/element_at_ansi.sql
@@ -0,0 +1,74 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode element_at tests
+-- Tests that element_at throws exceptions for out-of-bounds access in ANSI 
mode
+-- Note: element_at uses 1-based indexing
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_element_at_oob(arr array<int>) USING parquet
+
+statement
+INSERT INTO ansi_element_at_oob VALUES (array(1, 2, 3))
+
+-- ============================================================================
+-- element_at index out of bounds (positive index)
+-- Spark throws: [INVALID_ARRAY_INDEX_IN_ELEMENT_AT] ...
+-- Comet throws: Index out of bounds for array
+-- See https://github.com/apache/datafusion-comet/issues/3375
+-- ============================================================================
+
+-- index beyond array length should throw (1-based indexing)
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(arr, 10) FROM ansi_element_at_oob
+
+-- literal array with out of bounds access
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(array(1, 2, 3), 5)
+
+-- ============================================================================
+-- element_at with index 0 (invalid)
+-- Spark throws: [INVALID_INDEX_OF_ZERO] The index 0 is invalid
+-- Comet throws: different error message
+-- See https://github.com/apache/datafusion-comet/issues/3375
+-- ============================================================================
+
+-- index 0 is not valid for element_at (1-based indexing)
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(arr, 0) FROM ansi_element_at_oob
+
+-- literal with index 0
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(array(1, 2, 3), 0)
+
+-- ============================================================================
+-- element_at index out of bounds (negative index beyond array)
+-- ============================================================================
+
+-- negative index beyond array size should throw
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(arr, -10) FROM ansi_element_at_oob
+
+-- literal with negative out of bounds
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT element_at(array(1, 2, 3), -5)
diff --git 
a/spark/src/test/resources/sql-tests/expressions/array/get_array_item_ansi.sql 
b/spark/src/test/resources/sql-tests/expressions/array/get_array_item_ansi.sql
new file mode 100644
index 000000000..19d0cb2a6
--- /dev/null
+++ 
b/spark/src/test/resources/sql-tests/expressions/array/get_array_item_ansi.sql
@@ -0,0 +1,58 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode array index access tests
+-- Tests that array[index] throws exceptions for out-of-bounds access in ANSI 
mode
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_array_oob(arr array<int>) USING parquet
+
+statement
+INSERT INTO ansi_array_oob VALUES (array(1, 2, 3))
+
+-- ============================================================================
+-- Array index out of bounds (positive index)
+-- Spark throws: [INVALID_ARRAY_INDEX] The index X is out of bounds
+-- Comet throws: Index out of bounds for array
+-- See https://github.com/apache/datafusion-comet/issues/3375
+-- ============================================================================
+
+-- index beyond array length should throw (0-based indexing)
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT arr[10] FROM ansi_array_oob
+
+-- literal array with out of bounds access
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT array(1, 2, 3)[5]
+
+-- ============================================================================
+-- Array index out of bounds (negative index)
+-- ============================================================================
+
+-- negative index should throw
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT arr[-1] FROM ansi_array_oob
+
+-- literal with negative index
+query ignore(https://github.com/apache/datafusion-comet/issues/3375)
+SELECT array(1, 2, 3)[-1]
diff --git a/spark/src/test/resources/sql-tests/expressions/math/abs_ansi.sql 
b/spark/src/test/resources/sql-tests/expressions/math/abs_ansi.sql
new file mode 100644
index 000000000..c89a2958c
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/math/abs_ansi.sql
@@ -0,0 +1,97 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode abs function tests
+-- Tests that abs throws exceptions for overflow on minimum integer values
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_test_abs_int(v int) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_int VALUES (-2147483648)
+
+statement
+CREATE TABLE ansi_test_abs_long(v long) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_long VALUES (-9223372036854775808)
+
+statement
+CREATE TABLE ansi_test_abs_short(v short) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_short VALUES (-32768)
+
+statement
+CREATE TABLE ansi_test_abs_byte(v tinyint) USING parquet
+
+statement
+INSERT INTO ansi_test_abs_byte VALUES (-128)
+
+-- ============================================================================
+-- abs(INT_MIN) overflow
+-- ============================================================================
+
+-- abs(-2147483648) cannot be represented as int (since INT_MAX = 2147483647)
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_int
+
+-- literal
+query expect_error(overflow)
+SELECT abs(-2147483648)
+
+-- ============================================================================
+-- abs(LONG_MIN) overflow
+-- ============================================================================
+
+-- abs(-9223372036854775808) cannot be represented as long
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_long
+
+-- literal
+query expect_error(overflow)
+SELECT abs(-9223372036854775808L)
+
+-- ============================================================================
+-- abs(SHORT_MIN) overflow
+-- ============================================================================
+
+-- abs(-32768) cannot be represented as short
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_short
+
+-- literal
+query expect_error(overflow)
+SELECT abs(cast(-32768 as short))
+
+-- ============================================================================
+-- abs(BYTE_MIN) overflow
+-- ============================================================================
+
+-- abs(-128) cannot be represented as tinyint
+query expect_error(overflow)
+SELECT abs(v) FROM ansi_test_abs_byte
+
+-- literal
+query expect_error(overflow)
+SELECT abs(cast(-128 as tinyint))
diff --git 
a/spark/src/test/resources/sql-tests/expressions/math/arithmetic_ansi.sql 
b/spark/src/test/resources/sql-tests/expressions/math/arithmetic_ansi.sql
new file mode 100644
index 000000000..093e5a3f3
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/math/arithmetic_ansi.sql
@@ -0,0 +1,167 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode arithmetic tests
+-- Tests that ANSI mode throws exceptions for overflow and division by zero
+
+-- Config: spark.sql.ansi.enabled=true
+
+-- ============================================================================
+-- Test data setup for integer overflow
+-- ============================================================================
+
+statement
+CREATE TABLE ansi_int_overflow(a int, b int) USING parquet
+
+statement
+INSERT INTO ansi_int_overflow VALUES (2147483647, 1), (-2147483648, 1), 
(-2147483648, -1)
+
+statement
+CREATE TABLE ansi_long_overflow(a long, b long) USING parquet
+
+statement
+INSERT INTO ansi_long_overflow VALUES (9223372036854775807, 1), 
(-9223372036854775808, 1), (-9223372036854775808, -1)
+
+statement
+CREATE TABLE ansi_div_zero(a int, b int, c long, d long) USING parquet
+
+statement
+INSERT INTO ansi_div_zero VALUES (1, 0, 1, 0)
+
+-- ============================================================================
+-- Integer addition overflow
+-- ============================================================================
+
+-- INT_MAX + 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a + b FROM ansi_int_overflow WHERE a = 2147483647
+
+-- literal overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT 2147483647 + 1
+
+-- ============================================================================
+-- Integer subtraction overflow
+-- ============================================================================
+
+-- INT_MIN - 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a - b FROM ansi_int_overflow WHERE a = -2147483648
+
+-- literal overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -2147483648 - 1
+
+-- ============================================================================
+-- Integer multiplication overflow
+-- ============================================================================
+
+-- INT_MAX * 2 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a * 2 FROM ansi_int_overflow WHERE a = 2147483647
+
+-- literal overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT 2147483647 * 2
+
+-- ============================================================================
+-- Long addition overflow
+-- ============================================================================
+
+-- LONG_MAX + 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a + b FROM ansi_long_overflow WHERE a = 9223372036854775807
+
+-- ============================================================================
+-- Long subtraction overflow
+-- ============================================================================
+
+-- LONG_MIN - 1 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a - b FROM ansi_long_overflow WHERE a = -9223372036854775808
+
+-- ============================================================================
+-- Long multiplication overflow
+-- ============================================================================
+
+-- LONG_MAX * 2 should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT a * 2 FROM ansi_long_overflow WHERE a = 9223372036854775807
+
+-- ============================================================================
+-- Integer division by zero
+-- ============================================================================
+
+-- column / 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT a / b FROM ansi_div_zero
+
+-- column div 0 (integral division) should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT a div b FROM ansi_div_zero
+
+-- column % 0 (remainder) should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT a % b FROM ansi_div_zero
+
+-- literal / 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 / 0
+
+-- literal div 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 div 0
+
+-- literal % 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT 1 % 0
+
+-- ============================================================================
+-- Long division by zero
+-- ============================================================================
+
+-- long column / 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT c / d FROM ansi_div_zero
+
+-- long column div 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT c div d FROM ansi_div_zero
+
+-- long column % 0 should throw
+query expect_error(DIVIDE_BY_ZERO)
+SELECT c % d FROM ansi_div_zero
+
+-- ============================================================================
+-- Unary minus overflow
+-- ============================================================================
+
+-- negating INT_MIN should overflow (since INT_MAX is 2147483647, 
-(-2147483648) cannot fit)
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -a FROM ansi_int_overflow WHERE a = -2147483648
+
+-- negating LONG_MIN should overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -a FROM ansi_long_overflow WHERE a = -9223372036854775808
+
+-- literal negation overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -(-2147483648)
+
+-- literal long negation overflow
+query expect_error(ARITHMETIC_OVERFLOW)
+SELECT -(-9223372036854775808L)
diff --git a/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
index 136152ef7..4e3b9e045 100644
--- a/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometSqlFileTestSuite.scala
@@ -101,6 +101,20 @@ class CometSqlFileTestSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
                   checkSparkAnswerAndFallbackReason(sql, reason)
                 case Ignore(reason) =>
                   logInfo(s"IGNORED query (${reason}): $sql")
+                case ExpectError(pattern) =>
+                  val (sparkError, cometError) = 
checkSparkAnswerMaybeThrows(spark.sql(sql))
+                  assert(
+                    sparkError.isDefined,
+                    s"Expected Spark to throw an error matching '$pattern' but 
query succeeded")
+                  assert(
+                    cometError.isDefined,
+                    s"Expected Comet to throw an error matching '$pattern' but 
query succeeded")
+                  assert(
+                    sparkError.get.getMessage.contains(pattern),
+                    s"Spark error '${sparkError.get.getMessage}' does not 
contain '$pattern'")
+                  assert(
+                    cometError.get.getMessage.contains(pattern),
+                    s"Comet error '${cometError.get.getMessage}' does not 
contain '$pattern'")
               }
             }
         }
diff --git a/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala 
b/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
index 7a98fd57b..45198ed17 100644
--- a/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
+++ b/spark/src/test/scala/org/apache/comet/SqlFileTestParser.scala
@@ -55,6 +55,7 @@ case object SparkAnswerOnly extends QueryAssertionMode
 case class WithTolerance(tol: Double) extends QueryAssertionMode
 case class ExpectFallback(reason: String) extends QueryAssertionMode
 case class Ignore(reason: String) extends QueryAssertionMode
+case class ExpectError(pattern: String) extends QueryAssertionMode
 
 /**
  * Parsed representation of a .sql test file.
@@ -145,6 +146,7 @@ object SqlFileTestParser {
 
   private val FallbackPattern = """query\s+expect_fallback\((.+)\)""".r
   private val IgnorePattern = """query\s+ignore\((.+)\)""".r
+  private val ErrorPattern = """query\s+expect_error\((.+)\)""".r
 
   private def parseQueryAssertionMode(directive: String): QueryAssertionMode = 
{
     directive match {
@@ -152,6 +154,8 @@ object SqlFileTestParser {
         ExpectFallback(reason.trim)
       case IgnorePattern(reason) =>
         Ignore(reason.trim)
+      case ErrorPattern(pattern) =>
+        ExpectError(pattern.trim)
       case _ =>
         val parts = directive.split("\\s+")
         if (parts.length == 1) return CheckCoverageAndAnswer


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to