This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new b2cf02c7a Ignore null buffer when creating ArrayData if null count is
zero (#2056)
b2cf02c7a is described below
commit b2cf02c7a8a5027d037fc359323bc0ed45b943de
Author: Jörn Horstmann <[email protected]>
AuthorDate: Tue Jul 19 23:24:30 2022 +0200
Ignore null buffer when creating ArrayData if null count is zero (#2056)
* Avoid creating null buffer for BooleanArray if null count is zero
* Clippy fix
* Check null_count in ArrayData::new_unchecked and ignore null_bit_buffer
if there are no null values
---
arrow/src/array/array_boolean.rs | 31 +++++++++++++++++++++++++------
arrow/src/array/array_primitive.rs | 8 ++------
arrow/src/array/data.rs | 10 ++++++++--
3 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs
index 279db3253..e38a15f8e 100644
--- a/arrow/src/array/array_boolean.rs
+++ b/arrow/src/array/array_boolean.rs
@@ -227,12 +227,12 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for
BooleanArray {
let data_len = data_len.expect("Iterator must be sized"); // panic if
no upper bound.
let num_bytes = bit_util::ceil(data_len, 8);
- let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
- let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);
+ let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
+ let mut val_builder = MutableBuffer::from_len_zeroed(num_bytes);
- let data = val_buf.as_slice_mut();
+ let data = val_builder.as_slice_mut();
- let null_slice = null_buf.as_slice_mut();
+ let null_slice = null_builder.as_slice_mut();
iter.enumerate().for_each(|(i, item)| {
if let Some(a) = item.borrow() {
bit_util::set_bit(null_slice, i);
@@ -247,9 +247,9 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for
BooleanArray {
DataType::Boolean,
data_len,
None,
- Some(null_buf.into()),
+ Some(null_builder.into()),
0,
- vec![val_buf.into()],
+ vec![val_builder.into()],
vec![],
)
};
@@ -328,6 +328,7 @@ mod tests {
assert_eq!(4, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
+ assert!(arr.data().null_buffer().is_none());
for i in 0..3 {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
@@ -335,6 +336,24 @@ mod tests {
}
}
+ #[test]
+ fn test_boolean_array_from_nullable_iter() {
+ let v = vec![Some(true), None, Some(false), None];
+ let arr = v.into_iter().collect::<BooleanArray>();
+ assert_eq!(4, arr.len());
+ assert_eq!(0, arr.offset());
+ assert_eq!(2, arr.null_count());
+ assert!(arr.data().null_buffer().is_some());
+
+ assert!(arr.is_valid(0));
+ assert!(arr.is_null(1));
+ assert!(arr.is_valid(2));
+ assert!(arr.is_null(3));
+
+ assert!(arr.value(0));
+ assert!(!arr.value(2));
+ }
+
#[test]
fn test_boolean_array_builder() {
// Test building a boolean array with ArrayData builder and offset
diff --git a/arrow/src/array/array_primitive.rs
b/arrow/src/array/array_primitive.rs
index efac5a60c..5d25412dc 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -425,17 +425,13 @@ impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>>
FromIterator<Ptr>
.collect();
let len = null_builder.len();
- let null_buf: Buffer = null_builder.into();
- let valid_count = null_buf.count_set_bits();
- let null_count = len - valid_count;
- let opt_null_buf = (null_count != 0).then(|| null_buf);
let data = unsafe {
ArrayData::new_unchecked(
T::DATA_TYPE,
len,
- Some(null_count),
- opt_null_buf,
+ None,
+ Some(null_builder.into()),
0,
vec![buffer],
vec![],
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 5c7bd69d8..0bb503e94 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -267,7 +267,10 @@ impl ArrayData {
/// Create a new ArrayData instance;
///
/// If `null_count` is not specified, the number of nulls in
- /// null_bit_buffer is calculated
+ /// null_bit_buffer is calculated.
+ ///
+ /// If the number of nulls is 0 then the null_bit_buffer
+ /// is set to `None`.
///
/// # Safety
///
@@ -291,7 +294,7 @@ impl ArrayData {
None => count_nulls(null_bit_buffer.as_ref(), offset, len),
Some(null_count) => null_count,
};
- let null_bitmap = null_bit_buffer.map(Bitmap::from);
+ let null_bitmap = null_bit_buffer.filter(|_| null_count >
0).map(Bitmap::from);
let new_self = Self {
data_type,
len,
@@ -311,6 +314,9 @@ impl ArrayData {
/// Create a new ArrayData, validating that the provided buffers
/// form a valid Arrow array of the specified data type.
///
+ /// If the number of nulls in `null_bit_buffer` is 0 then the
null_bit_buffer
+ /// is set to `None`.
+ ///
/// Note: This is a low level API and most users of the arrow
/// crate should create arrays using the methods in the `array`
/// module.