This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new b2cf02c7a Ignore null buffer when creating ArrayData if null count is 
zero (#2056)
b2cf02c7a is described below

commit b2cf02c7a8a5027d037fc359323bc0ed45b943de
Author: Jörn Horstmann <[email protected]>
AuthorDate: Tue Jul 19 23:24:30 2022 +0200

    Ignore null buffer when creating ArrayData if null count is zero (#2056)
    
    * Avoid creating null buffer for BooleanArray if null count is zero
    
    * Clippy fix
    
    * Check null_count in ArrayData::new_unchecked and ignore null_bit_buffer 
if there are no null values
---
 arrow/src/array/array_boolean.rs   | 31 +++++++++++++++++++++++++------
 arrow/src/array/array_primitive.rs |  8 ++------
 arrow/src/array/data.rs            | 10 ++++++++--
 3 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs
index 279db3253..e38a15f8e 100644
--- a/arrow/src/array/array_boolean.rs
+++ b/arrow/src/array/array_boolean.rs
@@ -227,12 +227,12 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for 
BooleanArray {
         let data_len = data_len.expect("Iterator must be sized"); // panic if 
no upper bound.
 
         let num_bytes = bit_util::ceil(data_len, 8);
-        let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
-        let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);
+        let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
+        let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
 
-        let data = val_buf.as_slice_mut();
+        let data = val_builder.as_slice_mut();
 
-        let null_slice = null_buf.as_slice_mut();
+        let null_slice = null_builder.as_slice_mut();
         iter.enumerate().for_each(|(i, item)| {
             if let Some(a) = item.borrow() {
                 bit_util::set_bit(null_slice, i);
@@ -247,9 +247,9 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for 
BooleanArray {
                 DataType::Boolean,
                 data_len,
                 None,
-                Some(null_buf.into()),
+                Some(null_builder.into()),
                 0,
-                vec![val_buf.into()],
+                vec![val_builder.into()],
                 vec![],
             )
         };
@@ -328,6 +328,7 @@ mod tests {
         assert_eq!(4, arr.len());
         assert_eq!(0, arr.offset());
         assert_eq!(0, arr.null_count());
+        assert!(arr.data().null_buffer().is_none());
         for i in 0..3 {
             assert!(!arr.is_null(i));
             assert!(arr.is_valid(i));
@@ -335,6 +336,24 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_boolean_array_from_nullable_iter() {
+        let v = vec![Some(true), None, Some(false), None];
+        let arr = v.into_iter().collect::<BooleanArray>();
+        assert_eq!(4, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(2, arr.null_count());
+        assert!(arr.data().null_buffer().is_some());
+
+        assert!(arr.is_valid(0));
+        assert!(arr.is_null(1));
+        assert!(arr.is_valid(2));
+        assert!(arr.is_null(3));
+
+        assert!(arr.value(0));
+        assert!(!arr.value(2));
+    }
+
     #[test]
     fn test_boolean_array_builder() {
         // Test building a boolean array with ArrayData builder and offset
diff --git a/arrow/src/array/array_primitive.rs 
b/arrow/src/array/array_primitive.rs
index efac5a60c..5d25412dc 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -425,17 +425,13 @@ impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> 
FromIterator<Ptr>
             .collect();
 
         let len = null_builder.len();
-        let null_buf: Buffer = null_builder.into();
-        let valid_count = null_buf.count_set_bits();
-        let null_count = len - valid_count;
-        let opt_null_buf = (null_count != 0).then(|| null_buf);
 
         let data = unsafe {
             ArrayData::new_unchecked(
                 T::DATA_TYPE,
                 len,
-                Some(null_count),
-                opt_null_buf,
+                None,
+                Some(null_builder.into()),
                 0,
                 vec![buffer],
                 vec![],
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 5c7bd69d8..0bb503e94 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -267,7 +267,10 @@ impl ArrayData {
     /// Create a new ArrayData instance;
     ///
     /// If `null_count` is not specified, the number of nulls in
-    /// null_bit_buffer is calculated
+    /// null_bit_buffer is calculated.
+    ///
+    /// If the number of nulls is 0 then the null_bit_buffer
+    /// is set to `None`.
     ///
     /// # Safety
     ///
@@ -291,7 +294,7 @@ impl ArrayData {
             None => count_nulls(null_bit_buffer.as_ref(), offset, len),
             Some(null_count) => null_count,
         };
-        let null_bitmap = null_bit_buffer.map(Bitmap::from);
+        let null_bitmap = null_bit_buffer.filter(|_| null_count > 
0).map(Bitmap::from);
         let new_self = Self {
             data_type,
             len,
@@ -311,6 +314,9 @@ impl ArrayData {
     /// Create a new ArrayData, validating that the provided buffers
     /// form a valid Arrow array of the specified data type.
     ///
+    /// If the number of nulls in `null_bit_buffer` is 0 then the 
null_bit_buffer
+    /// is set to `None`.
+    ///
     /// Note: This is a low level API and most users of the arrow
     /// crate should create arrays using the methods in the `array`
     /// module.

Reply via email to