This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 7465fe312 fix: Fix `space()` with negative input (#3347)
7465fe312 is described below

commit 7465fe3126ff64075dc64bd63c8b9eb7598b568f
Author: hsiang-c <[email protected]>
AuthorDate: Fri Jan 30 10:42:08 2026 -0800

    fix: Fix `space()` with negative input (#3347)
    
    * fix: handle negative lengths in string_space expression
    
    The string_space function previously failed with overflow when given
    negative input values. Now treats negative lengths as zero, returning
    empty strings.
    
    Co-Authored-By: Claude Sonnet 4.5 <[email protected]>
    
    * Fix a typo
    
    * Call max() once; more comments
    
    ---------
    
    Co-authored-by: Claude Sonnet 4.5 <[email protected]>
---
 native/spark-expr/src/string_funcs/string_space.rs | 32 ++++++++++++++++++++--
 native/spark-expr/src/string_funcs/substring.rs    |  2 +-
 .../apache/comet/CometStringExpressionSuite.scala  |  2 +-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/native/spark-expr/src/string_funcs/string_space.rs 
b/native/spark-expr/src/string_funcs/string_space.rs
index 00b880730..4ab536279 100644
--- a/native/spark-expr/src/string_funcs/string_space.rs
+++ b/native/spark-expr/src/string_funcs/string_space.rs
@@ -119,9 +119,14 @@ fn generic_string_space<OffsetSize: 
OffsetSizeTrait>(length: &Int32Array) -> Arr
     let null_bit_buffer = length.to_data().nulls().map(|b| b.buffer().clone());
 
     // Gets slice of length array to access it directly for performance.
+    // Negative length values are set to zero to match Spark behavior
     let length_data = length.to_data();
-    let lengths = length_data.buffers()[0].typed_data::<i32>();
-    let total = lengths.iter().map(|l| *l as usize).sum::<usize>();
+    let lengths: Vec<_> = length_data.buffers()[0]
+        .typed_data::<i32>()
+        .iter()
+        .map(|l| (*l).max(0) as usize)
+        .collect();
+    let total = lengths.iter().sum::<usize>();
     let mut values = MutableBuffer::new(total);
 
     offsets.push(length_so_far);
@@ -130,7 +135,7 @@ fn generic_string_space<OffsetSize: 
OffsetSizeTrait>(length: &Int32Array) -> Arr
     values.resize(total, blank);
 
     (0..array_len).for_each(|i| {
-        let current_len = lengths[i] as usize;
+        let current_len = lengths[i];
 
         length_so_far += OffsetSize::from_usize(current_len).unwrap();
         offsets.push(length_so_far);
@@ -149,3 +154,24 @@ fn generic_string_space<OffsetSize: 
OffsetSizeTrait>(length: &Int32Array) -> Arr
     };
     make_array(data)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::StringArray;
+    use datafusion::common::cast::as_string_array;
+
+    #[test]
+    fn test_negative_length() {
+        let input = Int32Array::from(vec![Some(-1), Some(-2), None]);
+        let args = ColumnarValue::Array(Arc::new(input));
+        match spark_string_space(&[args]) {
+            Ok(ColumnarValue::Array(result)) => {
+                let actual = as_string_array(&result).unwrap();
+                let expected = StringArray::from(vec![Some(""), Some(""), 
None]);
+                assert_eq!(actual, &expected)
+            }
+            _ => unreachable!(),
+        }
+    }
+}
diff --git a/native/spark-expr/src/string_funcs/substring.rs 
b/native/spark-expr/src/string_funcs/substring.rs
index f0ecb4e7e..5037a6e06 100644
--- a/native/spark-expr/src/string_funcs/substring.rs
+++ b/native/spark-expr/src/string_funcs/substring.rs
@@ -61,7 +61,7 @@ impl Display for SubstringExpr {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
-            "StringSpace [start: {}, len: {}, child: {}]",
+            "Substring [start: {}, len: {}, child: {}]",
             self.start, self.len, self.child
         )
     }
diff --git 
a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala
index f9882780c..662534b3c 100644
--- a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala
@@ -376,7 +376,7 @@ class CometStringExpressionSuite extends CometTestBase {
   }
 
   test("string_space") {
-    withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
+    withParquetTable((0 until 5).map(i => (-i, i + 1)), "tbl") {
       checkSparkAnswerAndOperator("SELECT space(_1), space(_2) FROM tbl")
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to