This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 98cc75361f Adds support for ANSI mode in negative function (#20189)
98cc75361f is described below
commit 98cc75361fb0909ce27f56cbd664bc25b26ccbf4
Author: Subham Singhal <[email protected]>
AuthorDate: Sat Feb 14 23:52:04 2026 +0530
Adds support for ANSI mode in negative function (#20189)
## Which issue does this PR close?
https://github.com/apache/datafusion/issues/20034
## Rationale for this change
ANSI mode support for negative function
## What changes are included in this PR?
Added support for ANSI mode for negative function
## Are these changes tested?
yes through UT
## Are there any user-facing changes?
yes, adds ANSI support to existing function.
---------
Co-authored-by: Subham Singhal <[email protected]>
---
datafusion/spark/src/function/math/negative.rs | 430 +++++++++++++++------
.../test_files/spark/math/negative.slt | 52 +++
2 files changed, 359 insertions(+), 123 deletions(-)
diff --git a/datafusion/spark/src/function/math/negative.rs
b/datafusion/spark/src/function/math/negative.rs
index f1803d2d77..2df71b709d 100644
--- a/datafusion/spark/src/function/math/negative.rs
+++ b/datafusion/spark/src/function/math/negative.rs
@@ -20,7 +20,7 @@ use arrow::array::*;
use arrow::datatypes::{DataType, IntervalDayTime, IntervalMonthDayNano,
IntervalUnit};
use bigdecimal::num_traits::WrappingNeg;
use datafusion_common::utils::take_function_args;
-use datafusion_common::{Result, ScalarValue, not_impl_err};
+use datafusion_common::{Result, ScalarValue, exec_err, not_impl_err};
use datafusion_expr::{
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
Volatility,
@@ -34,14 +34,12 @@ use std::sync::Arc;
/// Returns the negation of input (equivalent to unary minus)
/// Returns NULL if input is NULL, returns NaN if input is NaN.
///
-/// ANSI mode support see
(<https://github.com/apache/datafusion/issues/20034>):
-/// - Spark's ANSI-compliant dialect, when off (i.e.
`spark.sql.ansi.enabled=false`),
-/// negating the minimal value of a signed integer wraps around.
-/// For example: negative(i32::MIN) returns i32::MIN (wraps instead of
error).
-/// This is the current implementation (legacy mode only).
-/// - Spark's ANSI mode (when `spark.sql.ansi.enabled=true`) should throw an
-/// ARITHMETIC_OVERFLOW error on integer overflow instead of wrapping.
-/// This is not yet implemented - all operations currently use wrapping
behavior.
+/// ANSI mode support:
+/// - When ANSI mode is disabled (`spark.sql.ansi.enabled=false`), negating
the minimal
+/// value of a signed integer wraps around. For example: negative(i32::MIN)
returns
+/// i32::MIN (wraps instead of error).
+/// - When ANSI mode is enabled (`spark.sql.ansi.enabled=true`), overflow
conditions
+/// throw an ARITHMETIC_OVERFLOW error instead of wrapping.
///
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkNegative {
@@ -96,107 +94,231 @@ impl ScalarUDFImpl for SparkNegative {
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
- spark_negative(&args.args)
+ spark_negative(&args.args,
args.config_options.execution.enable_ansi_mode)
}
}
+/// Macro to implement negation for integer array types
+macro_rules! impl_integer_array_negative {
+ ($array:expr, $type:ty, $type_name:expr, $enable_ansi_mode:expr) => {{
+ let array = $array.as_primitive::<$type>();
+ let result: PrimitiveArray<$type> = if $enable_ansi_mode {
+ array.try_unary(|x| {
+ x.checked_neg().ok_or_else(|| {
+ (exec_err!("{} overflow on negative({x})", $type_name)
+ as Result<(), _>)
+ .unwrap_err()
+ })
+ })?
+ } else {
+ array.unary(|x| x.wrapping_neg())
+ };
+ Ok(ColumnarValue::Array(Arc::new(result)))
+ }};
+}
+
+/// Macro to implement negation for float array types
+macro_rules! impl_float_array_negative {
+ ($array:expr, $type:ty) => {{
+ let array = $array.as_primitive::<$type>();
+ let result: PrimitiveArray<$type> = array.unary(|x| -x);
+ Ok(ColumnarValue::Array(Arc::new(result)))
+ }};
+}
+
+/// Macro to implement negation for decimal array types
+macro_rules! impl_decimal_array_negative {
+ ($array:expr, $type:ty, $type_name:expr, $enable_ansi_mode:expr) => {{
+ let array = $array.as_primitive::<$type>();
+ let result: PrimitiveArray<$type> = if $enable_ansi_mode {
+ array
+ .try_unary(|x| {
+ x.checked_neg().ok_or_else(|| {
+ (exec_err!("{} overflow on negative({x})", $type_name)
+ as Result<(), _>)
+ .unwrap_err()
+ })
+ })?
+ .with_data_type(array.data_type().clone())
+ } else {
+ array.unary(|x| x.wrapping_neg())
+ };
+ Ok(ColumnarValue::Array(Arc::new(result)))
+ }};
+}
+
+/// Macro to implement negation for integer scalar types
+macro_rules! impl_integer_scalar_negative {
+ ($v:expr, $type_name:expr, $variant:ident, $enable_ansi_mode:expr) => {{
+ let result = if $enable_ansi_mode {
+ $v.checked_neg().ok_or_else(|| {
+ (exec_err!("{} overflow on negative({})", $type_name, $v)
+ as Result<(), _>)
+ .unwrap_err()
+ })?
+ } else {
+ $v.wrapping_neg()
+ };
+ Ok(ColumnarValue::Scalar(ScalarValue::$variant(Some(result))))
+ }};
+}
+
+/// Macro to implement negation for decimal scalar types
+macro_rules! impl_decimal_scalar_negative {
+ ($v:expr, $precision:expr, $scale:expr, $type_name:expr, $variant:ident,
$enable_ansi_mode:expr) => {{
+ let result = if $enable_ansi_mode {
+ $v.checked_neg().ok_or_else(|| {
+ (exec_err!("{} overflow on negative({})", $type_name, $v)
+ as Result<(), _>)
+ .unwrap_err()
+ })?
+ } else {
+ $v.wrapping_neg()
+ };
+ Ok(ColumnarValue::Scalar(ScalarValue::$variant(
+ Some(result),
+ *$precision,
+ *$scale,
+ )))
+ }};
+}
+
/// Core implementation of Spark's negative function
-fn spark_negative(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+fn spark_negative(
+ args: &[ColumnarValue],
+ enable_ansi_mode: bool,
+) -> Result<ColumnarValue> {
let [arg] = take_function_args("negative", args)?;
match arg {
ColumnarValue::Array(array) => match array.data_type() {
DataType::Null => Ok(arg.clone()),
- // Signed integers - use wrapping negation (Spark legacy mode
behavior)
+ // Signed integers - use checked negation in ANSI mode, wrapping
in legacy mode
DataType::Int8 => {
- let array = array.as_primitive::<Int8Type>();
- let result: PrimitiveArray<Int8Type> = array.unary(|x|
x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
+ impl_integer_array_negative!(array, Int8Type, "Int8",
enable_ansi_mode)
}
DataType::Int16 => {
- let array = array.as_primitive::<Int16Type>();
- let result: PrimitiveArray<Int16Type> = array.unary(|x|
x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
+ impl_integer_array_negative!(array, Int16Type, "Int16",
enable_ansi_mode)
}
DataType::Int32 => {
- let array = array.as_primitive::<Int32Type>();
- let result: PrimitiveArray<Int32Type> = array.unary(|x|
x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
+ impl_integer_array_negative!(array, Int32Type, "Int32",
enable_ansi_mode)
}
DataType::Int64 => {
- let array = array.as_primitive::<Int64Type>();
- let result: PrimitiveArray<Int64Type> = array.unary(|x|
x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
+ impl_integer_array_negative!(array, Int64Type, "Int64",
enable_ansi_mode)
}
// Floating point - simple negation (no overflow possible)
- DataType::Float16 => {
- let array = array.as_primitive::<Float16Type>();
- let result: PrimitiveArray<Float16Type> = array.unary(|x| -x);
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
- DataType::Float32 => {
- let array = array.as_primitive::<Float32Type>();
- let result: PrimitiveArray<Float32Type> = array.unary(|x| -x);
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
- DataType::Float64 => {
- let array = array.as_primitive::<Float64Type>();
- let result: PrimitiveArray<Float64Type> = array.unary(|x| -x);
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
+ DataType::Float16 => impl_float_array_negative!(array,
Float16Type),
+ DataType::Float32 => impl_float_array_negative!(array,
Float32Type),
+ DataType::Float64 => impl_float_array_negative!(array,
Float64Type),
- // Decimal types - wrapping negation
- DataType::Decimal32(_, _) => {
- let array = array.as_primitive::<Decimal32Type>();
- let result: PrimitiveArray<Decimal32Type> =
- array.unary(|x| x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
- DataType::Decimal64(_, _) => {
- let array = array.as_primitive::<Decimal64Type>();
- let result: PrimitiveArray<Decimal64Type> =
- array.unary(|x| x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
- DataType::Decimal128(_, _) => {
- let array = array.as_primitive::<Decimal128Type>();
- let result: PrimitiveArray<Decimal128Type> =
- array.unary(|x| x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
- DataType::Decimal256(_, _) => {
- let array = array.as_primitive::<Decimal256Type>();
- let result: PrimitiveArray<Decimal256Type> =
- array.unary(|x| x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
- }
+ // Decimal types - use checked negation in ANSI mode, wrapping in
legacy mode
+ DataType::Decimal32(_, _) => impl_decimal_array_negative!(
+ array,
+ Decimal32Type,
+ "Decimal32",
+ enable_ansi_mode
+ ),
+ DataType::Decimal64(_, _) => impl_decimal_array_negative!(
+ array,
+ Decimal64Type,
+ "Decimal64",
+ enable_ansi_mode
+ ),
+ DataType::Decimal128(_, _) => impl_decimal_array_negative!(
+ array,
+ Decimal128Type,
+ "Decimal128",
+ enable_ansi_mode
+ ),
+ DataType::Decimal256(_, _) => impl_decimal_array_negative!(
+ array,
+ Decimal256Type,
+ "Decimal256",
+ enable_ansi_mode
+ ),
- // interval type
+ // interval type - use checked negation in ANSI mode, wrapping in
legacy mode
DataType::Interval(IntervalUnit::YearMonth) => {
- let array = array.as_primitive::<IntervalYearMonthType>();
- let result: PrimitiveArray<IntervalYearMonthType> =
- array.unary(|x| x.wrapping_neg());
- Ok(ColumnarValue::Array(Arc::new(result)))
+ impl_integer_array_negative!(
+ array,
+ IntervalYearMonthType,
+ "IntervalYearMonth",
+ enable_ansi_mode
+ )
}
DataType::Interval(IntervalUnit::DayTime) => {
let array = array.as_primitive::<IntervalDayTimeType>();
- let result: PrimitiveArray<IntervalDayTimeType> =
+ let result: PrimitiveArray<IntervalDayTimeType> = if
enable_ansi_mode {
+ array.try_unary(|x| {
+ let days = x.days.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalDayTime overflow on negative (days:
{})",
+ x.days
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ let milliseconds =
+ x.milliseconds.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalDayTime overflow on negative
(milliseconds: {})",
+ x.milliseconds
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ Ok::<_, arrow::error::ArrowError>(IntervalDayTime {
+ days,
+ milliseconds,
+ })
+ })?
+ } else {
array.unary(|x| IntervalDayTime {
days: x.days.wrapping_neg(),
milliseconds: x.milliseconds.wrapping_neg(),
- });
+ })
+ };
Ok(ColumnarValue::Array(Arc::new(result)))
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
let array = array.as_primitive::<IntervalMonthDayNanoType>();
- let result: PrimitiveArray<IntervalMonthDayNanoType> =
+ let result: PrimitiveArray<IntervalMonthDayNanoType> = if
enable_ansi_mode
+ {
+ array.try_unary(|x| {
+ let months = x.months.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalMonthDayNano overflow on negative
(months: {})",
+ x.months
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ let days = x.days.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalMonthDayNano overflow on negative
(days: {})",
+ x.days
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ let nanoseconds =
x.nanoseconds.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalMonthDayNano overflow on negative
(nanoseconds: {})",
+ x.nanoseconds
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ Ok::<_, arrow::error::ArrowError>(IntervalMonthDayNano
{
+ months,
+ days,
+ nanoseconds,
+ })
+ })?
+ } else {
array.unary(|x| IntervalMonthDayNano {
months: x.months.wrapping_neg(),
days: x.days.wrapping_neg(),
nanoseconds: x.nanoseconds.wrapping_neg(),
- });
+ })
+ };
Ok(ColumnarValue::Array(Arc::new(result)))
}
@@ -206,22 +328,18 @@ fn spark_negative(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
ScalarValue::Null => Ok(arg.clone()),
_ if sv.is_null() => Ok(arg.clone()),
- // Signed integers - wrapping negation
+ // Signed integers - use checked negation in ANSI mode, wrapping
in legacy mode
ScalarValue::Int8(Some(v)) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Int8(Some(result))))
+ impl_integer_scalar_negative!(v, "Int8", Int8,
enable_ansi_mode)
}
ScalarValue::Int16(Some(v)) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Int16(Some(result))))
+ impl_integer_scalar_negative!(v, "Int16", Int16,
enable_ansi_mode)
}
ScalarValue::Int32(Some(v)) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(result))))
+ impl_integer_scalar_negative!(v, "Int32", Int32,
enable_ansi_mode)
}
ScalarValue::Int64(Some(v)) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Int64(Some(result))))
+ impl_integer_scalar_negative!(v, "Int64", Int64,
enable_ansi_mode)
}
// Floating point - simple negation
@@ -235,58 +353,124 @@ fn spark_negative(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
Ok(ColumnarValue::Scalar(ScalarValue::Float64(Some(-v))))
}
- // Decimal types - wrapping negation
+ // Decimal types - use checked negation in ANSI mode, wrapping in
legacy mode
ScalarValue::Decimal32(Some(v), precision, scale) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Decimal32(
- Some(result),
- *precision,
- *scale,
- )))
+ impl_decimal_scalar_negative!(
+ v,
+ precision,
+ scale,
+ "Decimal32",
+ Decimal32,
+ enable_ansi_mode
+ )
}
ScalarValue::Decimal64(Some(v), precision, scale) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Decimal64(
- Some(result),
- *precision,
- *scale,
- )))
+ impl_decimal_scalar_negative!(
+ v,
+ precision,
+ scale,
+ "Decimal64",
+ Decimal64,
+ enable_ansi_mode
+ )
}
ScalarValue::Decimal128(Some(v), precision, scale) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Decimal128(
- Some(result),
- *precision,
- *scale,
- )))
+ impl_decimal_scalar_negative!(
+ v,
+ precision,
+ scale,
+ "Decimal128",
+ Decimal128,
+ enable_ansi_mode
+ )
}
ScalarValue::Decimal256(Some(v), precision, scale) => {
- let result = v.wrapping_neg();
- Ok(ColumnarValue::Scalar(ScalarValue::Decimal256(
+ impl_decimal_scalar_negative!(
+ v,
+ precision,
+ scale,
+ "Decimal256",
+ Decimal256,
+ enable_ansi_mode
+ )
+ }
+
+ //interval type - use checked negation in ANSI mode, wrapping in
legacy mode
+ ScalarValue::IntervalYearMonth(Some(v)) => {
+ impl_integer_scalar_negative!(
+ v,
+ "IntervalYearMonth",
+ IntervalYearMonth,
+ enable_ansi_mode
+ )
+ }
+ ScalarValue::IntervalDayTime(Some(v)) => {
+ let result = if enable_ansi_mode {
+ let days = v.days.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalDayTime overflow on negative (days: {})",
+ v.days
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ let milliseconds =
v.milliseconds.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalDayTime overflow on negative
(milliseconds: {})",
+ v.milliseconds
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ IntervalDayTime { days, milliseconds }
+ } else {
+ IntervalDayTime {
+ days: v.days.wrapping_neg(),
+ milliseconds: v.milliseconds.wrapping_neg(),
+ }
+ };
+ Ok(ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(
+ result,
+ ))))
+ }
+ ScalarValue::IntervalMonthDayNano(Some(v)) => {
+ let result = if enable_ansi_mode {
+ let months = v.months.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalMonthDayNano overflow on negative
(months: {})",
+ v.months
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ let days = v.days.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalMonthDayNano overflow on negative (days:
{})",
+ v.days
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ let nanoseconds =
v.nanoseconds.checked_neg().ok_or_else(|| {
+ (exec_err!(
+ "IntervalMonthDayNano overflow on negative
(nanoseconds: {})",
+ v.nanoseconds
+ ) as Result<(), _>)
+ .unwrap_err()
+ })?;
+ IntervalMonthDayNano {
+ months,
+ days,
+ nanoseconds,
+ }
+ } else {
+ IntervalMonthDayNano {
+ months: v.months.wrapping_neg(),
+ days: v.days.wrapping_neg(),
+ nanoseconds: v.nanoseconds.wrapping_neg(),
+ }
+ };
+ Ok(ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(
Some(result),
- *precision,
- *scale,
)))
}
- //interval type
- ScalarValue::IntervalYearMonth(Some(v)) =>
Ok(ColumnarValue::Scalar(
- ScalarValue::IntervalYearMonth(Some(v.wrapping_neg())),
- )),
- ScalarValue::IntervalDayTime(Some(v)) => Ok(ColumnarValue::Scalar(
- ScalarValue::IntervalDayTime(Some(IntervalDayTime {
- days: v.days.wrapping_neg(),
- milliseconds: v.milliseconds.wrapping_neg(),
- })),
- )),
- ScalarValue::IntervalMonthDayNano(Some(v)) =>
Ok(ColumnarValue::Scalar(
- ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
- months: v.months.wrapping_neg(),
- days: v.days.wrapping_neg(),
- nanoseconds: v.nanoseconds.wrapping_neg(),
- })),
- )),
-
dt => not_impl_err!("Not supported datatype for Spark negative():
{dt}"),
},
}
diff --git a/datafusion/sqllogictest/test_files/spark/math/negative.slt
b/datafusion/sqllogictest/test_files/spark/math/negative.slt
index c62267e496..40bfaf791f 100644
--- a/datafusion/sqllogictest/test_files/spark/math/negative.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/negative.slt
@@ -277,3 +277,55 @@ SELECT negative(column1) FROM interval_test;
statement ok
DROP TABLE interval_test;
+
+## ANSI mode tests: overflow detection
+statement ok
+set datafusion.execution.enable_ansi_mode = true;
+
+# Test ANSI mode: negative of minimum values should error (overflow)
+query error DataFusion error: Execution error: Int8 overflow on
negative\(\-128\)
+SELECT negative((-128)::tinyint);
+
+query error DataFusion error: Execution error: Int16 overflow on
negative\(\-32768\)
+SELECT negative((-32768)::smallint);
+
+query error DataFusion error: Execution error: Int32 overflow on
negative\(\-2147483648\)
+SELECT negative((-2147483648)::int);
+
+query error DataFusion error: Execution error: Int64 overflow on
negative\(\-9223372036854775808\)
+SELECT negative((-9223372036854775808)::bigint);
+
+# Test ANSI mode: negative of (MIN+1) should succeed (boundary test)
+query I
+SELECT negative((-127)::tinyint);
+----
+127
+
+query I
+SELECT negative((-32767)::smallint);
+----
+32767
+
+query I
+SELECT negative((-2147483647)::int);
+----
+2147483647
+
+query I
+SELECT negative((-9223372036854775807)::bigint);
+----
+9223372036854775807
+
+# Test ANSI mode: array with MIN value should error
+statement ok
+CREATE TABLE min_values_ansi AS VALUES (-2147483648);
+
+query error DataFusion error: Execution error: Int32 overflow on
negative\(\-2147483648\)
+SELECT negative(column1::int) FROM min_values_ansi;
+
+statement ok
+DROP TABLE min_values_ansi;
+
+# Reset ANSI mode to false
+statement ok
+set datafusion.execution.enable_ansi_mode = false;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]