This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 19889a33f6 Use chunks_exact for has_true/has_false to enable compiler 
unrolling (#9570)
19889a33f6 is described below

commit 19889a33f63427c4b22ab3b7fcb62b77dbe9ddec
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Wed Mar 18 02:58:01 2026 -0500

    Use chunks_exact for has_true/has_false to enable compiler unrolling (#9570)
    
    ## Summary
    - Replace `.chunks(64)` with `.chunks_exact(16)` in `has_true()` and
    `has_false()` as suggested in
    https://github.com/apache/arrow-rs/pull/9511#discussion_r2950942579
    - With `chunks_exact`, the compiler can fully unroll the inner fold
    (guaranteed size, no inner branch/loop), allowing a smaller block size
    for more frequent short-circuit exits without regressing the full-scan
    path
    
    ## Benchmark results (block size 16 vs baseline)
    - Full-scan worst case (65536): No regression (~49ns both)
    - Early-exit cases (65536): ~27% faster (6.0ns → 4.4ns)
    - Small arrays (64, 1024): Unchanged
    
    ## Test plan
    - [x] All 13 existing `test_has` tests pass
    
    run benchmarks boolean_array
    
    @DanDanDan Would appreciate your review!
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
 arrow-array/src/array/boolean_array.rs | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/arrow-array/src/array/boolean_array.rs 
b/arrow-array/src/array/boolean_array.rs
index 1a2dd986ad..ee3413e183 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -158,9 +158,9 @@ impl BooleanArray {
     }
 
     /// Block size for chunked fold operations in [`Self::has_true`] and 
[`Self::has_false`].
-    /// Folding this many u64 chunks at a time allows the compiler to 
autovectorize
-    /// the inner loop while still enabling short-circuit exits.
-    const CHUNK_FOLD_BLOCK_SIZE: usize = 64;
+    /// Using `chunks_exact` with this size lets the compiler fully unroll the 
inner
+    /// fold (no inner branch/loop), enabling short-circuit exits every N 
chunks.
+    const CHUNK_FOLD_BLOCK_SIZE: usize = 16;
 
     /// Returns an [`UnalignedBitChunk`] over this array's values.
     fn unaligned_bit_chunks(&self) -> UnalignedBitChunk<'_> {
@@ -204,11 +204,12 @@ impl BooleanArray {
             }
             None => {
                 let bit_chunks = self.unaligned_bit_chunks();
-                bit_chunks.prefix().unwrap_or(0) != 0
-                    || bit_chunks
-                        .chunks()
-                        .chunks(Self::CHUNK_FOLD_BLOCK_SIZE)
-                        .any(|block| block.iter().fold(0u64, |acc, &c| acc | 
c) != 0)
+                let chunks = bit_chunks.chunks();
+                let mut exact = 
chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
+                let found = bit_chunks.prefix().unwrap_or(0) != 0
+                    || exact.any(|block| block.iter().fold(0u64, |acc, &c| acc 
| c) != 0);
+                found
+                    || exact.remainder().iter().any(|&c| c != 0)
                     || bit_chunks.suffix().unwrap_or(0) != 0
             }
         }
@@ -243,13 +244,15 @@ impl BooleanArray {
                     (None, Some(_)) => (0, !trail_mask),
                     (None, None) => (0, 0),
                 };
-                bit_chunks
+                let chunks = bit_chunks.chunks();
+                let mut exact = 
chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
+                let found = bit_chunks
                     .prefix()
                     .is_some_and(|v| (v | prefix_fill) != u64::MAX)
-                    || bit_chunks
-                        .chunks()
-                        .chunks(Self::CHUNK_FOLD_BLOCK_SIZE)
-                        .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc 
& c) != u64::MAX)
+                    || exact
+                        .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc 
& c) != u64::MAX);
+                found
+                    || exact.remainder().iter().any(|&c| c != u64::MAX)
                     || bit_chunks
                         .suffix()
                         .is_some_and(|v| (v | suffix_fill) != u64::MAX)

Reply via email to