This is an automated email from the ASF dual-hosted git repository.

scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new e4b68e6f82 Add `append_non_nulls` to `StructBuilder` (#9430)
e4b68e6f82 is described below

commit e4b68e6f82e41d3f06182e39723183c28e47afa4
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Mar 2 22:51:19 2026 +0100

    Add `append_non_nulls` to `StructBuilder` (#9430)
    
    # Which issue does this PR close?
    
    - Closes #9429
    
    I'm doing some performance optimization, and noticed that we have a loop
    adding one value to the null mask at a time. Instead, I'd suggest adding
    `append_non_nulls` to do this at once.
    
    ```
    append_non_nulls(n) vs append(true) in a loop (with bitmap allocated)
    
    ┌───────────┬───────────────────┬─────────────────────┬─────────┐
    │     n     │ append(true) loop │ append_non_nulls(n) │ speedup │
    ├───────────┼───────────────────┼─────────────────────┼─────────┤
    │ 100       │ 251 ns            │ 73 ns               │ ~3x     │
    ├───────────┼───────────────────┼─────────────────────┼─────────┤
    │ 1,000     │ 2.0 µs            │ 94 ns               │ ~21x    │
    ├───────────┼───────────────────┼─────────────────────┼─────────┤
    │ 10,000    │ 19.3 µs           │ 119 ns              │ ~162x   │
    ├───────────┼───────────────────┼─────────────────────┼─────────┤
    │ 100,000   │ 191 µs            │ 348 ns              │ ~549x   │
    ├───────────┼───────────────────┼─────────────────────┼─────────┤
    │ 1,000,000 │ 1.90 ms           │ 3.5 µs              │ ~543x   │
    └───────────┴───────────────────┴─────────────────────┴─────────┘
    ```
    
    
    # Rationale for this change
    
    It adds a new public API in favor of performance improvements.
    
    # What changes are included in this PR?
    
    A new public API
    
    # Are these changes tested?
    
    Yes, with new unit-tests.
    
    # Are there any user-facing changes?
    
    Just a new convient API.
---
 arrow-array/src/builder/struct_builder.rs | 62 +++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/arrow-array/src/builder/struct_builder.rs 
b/arrow-array/src/builder/struct_builder.rs
index ad58e00857..795593c98a 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -213,6 +213,12 @@ impl StructBuilder {
         self.null_buffer_builder.append(is_valid);
     }
 
+    /// Appends `n` non-null entries into the builder.
+    #[inline]
+    pub fn append_non_nulls(&mut self, n: usize) {
+        self.null_buffer_builder.append_n_non_nulls(n);
+    }
+
     /// Appends a null element to the struct.
     #[inline]
     pub fn append_null(&mut self) {
@@ -727,4 +733,60 @@ mod tests {
         assert!(a1.is_valid(0));
         assert!(a1.is_null(1));
     }
+
+    #[test]
+    fn test_append_non_nulls() {
+        let int_builder = Int32Builder::new();
+        let fields = vec![Field::new("f1", DataType::Int32, false)];
+        let field_builders = vec![Box::new(int_builder) as Box<dyn 
ArrayBuilder>];
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[1, 2, 3, 4, 5]);
+        builder.append_non_nulls(5);
+
+        let arr = builder.finish();
+        assert_eq!(arr.len(), 5);
+        assert_eq!(arr.null_count(), 0);
+        for i in 0..5 {
+            assert!(arr.is_valid(i));
+        }
+    }
+
+    #[test]
+    fn test_append_non_nulls_with_nulls() {
+        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+        builder.append_null();
+        builder.append_non_nulls(3);
+        builder.append_nulls(2);
+        builder.append_non_nulls(1);
+
+        let arr = builder.finish();
+        assert_eq!(arr.len(), 7);
+        assert_eq!(arr.null_count(), 3);
+        assert!(arr.is_null(0));
+        assert!(arr.is_valid(1));
+        assert!(arr.is_valid(2));
+        assert!(arr.is_valid(3));
+        assert!(arr.is_null(4));
+        assert!(arr.is_null(5));
+        assert!(arr.is_valid(6));
+    }
+
+    #[test]
+    fn test_append_non_nulls_zero() {
+        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+        builder.append_non_nulls(0);
+        assert_eq!(builder.len(), 0);
+
+        builder.append(true);
+        builder.append_non_nulls(0);
+        assert_eq!(builder.len(), 1);
+
+        let arr = builder.finish();
+        assert_eq!(arr.len(), 1);
+        assert_eq!(arr.null_count(), 0);
+    }
 }

Reply via email to