This is an automated email from the ASF dual-hosted git repository.
scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new e4b68e6f82 Add `append_non_nulls` to `StructBuilder` (#9430)
e4b68e6f82 is described below
commit e4b68e6f82e41d3f06182e39723183c28e47afa4
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Mar 2 22:51:19 2026 +0100
Add `append_non_nulls` to `StructBuilder` (#9430)
# Which issue does this PR close?
- Closes #9429
I'm doing some performance optimization, and noticed that we have a loop
adding one value to the null mask at a time. Instead, I'd suggest adding
`append_non_nulls` to do this at once.
```
append_non_nulls(n) vs append(true) in a loop (with bitmap allocated)
┌───────────┬───────────────────┬─────────────────────┬─────────┐
│ n │ append(true) loop │ append_non_nulls(n) │ speedup │
├───────────┼───────────────────┼─────────────────────┼─────────┤
│ 100 │ 251 ns │ 73 ns │ ~3x │
├───────────┼───────────────────┼─────────────────────┼─────────┤
│ 1,000 │ 2.0 µs │ 94 ns │ ~21x │
├───────────┼───────────────────┼─────────────────────┼─────────┤
│ 10,000 │ 19.3 µs │ 119 ns │ ~162x │
├───────────┼───────────────────┼─────────────────────┼─────────┤
│ 100,000 │ 191 µs │ 348 ns │ ~549x │
├───────────┼───────────────────┼─────────────────────┼─────────┤
│ 1,000,000 │ 1.90 ms │ 3.5 µs │ ~543x │
└───────────┴───────────────────┴─────────────────────┴─────────┘
```
# Rationale for this change
It adds a new public API in favor of performance improvements.
# What changes are included in this PR?
A new public API
# Are these changes tested?
Yes, with new unit-tests.
# Are there any user-facing changes?
Just a new convient API.
---
arrow-array/src/builder/struct_builder.rs | 62 +++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index ad58e00857..795593c98a 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -213,6 +213,12 @@ impl StructBuilder {
self.null_buffer_builder.append(is_valid);
}
+ /// Appends `n` non-null entries into the builder.
+ #[inline]
+ pub fn append_non_nulls(&mut self, n: usize) {
+ self.null_buffer_builder.append_n_non_nulls(n);
+ }
+
/// Appends a null element to the struct.
#[inline]
pub fn append_null(&mut self) {
@@ -727,4 +733,60 @@ mod tests {
assert!(a1.is_valid(0));
assert!(a1.is_null(1));
}
+
+ #[test]
+ fn test_append_non_nulls() {
+ let int_builder = Int32Builder::new();
+ let fields = vec![Field::new("f1", DataType::Int32, false)];
+ let field_builders = vec![Box::new(int_builder) as Box<dyn
ArrayBuilder>];
+
+ let mut builder = StructBuilder::new(fields, field_builders);
+ builder
+ .field_builder::<Int32Builder>(0)
+ .unwrap()
+ .append_slice(&[1, 2, 3, 4, 5]);
+ builder.append_non_nulls(5);
+
+ let arr = builder.finish();
+ assert_eq!(arr.len(), 5);
+ assert_eq!(arr.null_count(), 0);
+ for i in 0..5 {
+ assert!(arr.is_valid(i));
+ }
+ }
+
+ #[test]
+ fn test_append_non_nulls_with_nulls() {
+ let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+ builder.append_null();
+ builder.append_non_nulls(3);
+ builder.append_nulls(2);
+ builder.append_non_nulls(1);
+
+ let arr = builder.finish();
+ assert_eq!(arr.len(), 7);
+ assert_eq!(arr.null_count(), 3);
+ assert!(arr.is_null(0));
+ assert!(arr.is_valid(1));
+ assert!(arr.is_valid(2));
+ assert!(arr.is_valid(3));
+ assert!(arr.is_null(4));
+ assert!(arr.is_null(5));
+ assert!(arr.is_valid(6));
+ }
+
+ #[test]
+ fn test_append_non_nulls_zero() {
+ let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+ builder.append_non_nulls(0);
+ assert_eq!(builder.len(), 0);
+
+ builder.append(true);
+ builder.append_non_nulls(0);
+ assert_eq!(builder.len(), 1);
+
+ let arr = builder.finish();
+ assert_eq!(arr.len(), 1);
+ assert_eq!(arr.null_count(), 0);
+ }
}