This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 19889a33f6 Use chunks_exact for has_true/has_false to enable compiler
unrolling (#9570)
19889a33f6 is described below
commit 19889a33f63427c4b22ab3b7fcb62b77dbe9ddec
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Wed Mar 18 02:58:01 2026 -0500
Use chunks_exact for has_true/has_false to enable compiler unrolling (#9570)
## Summary
- Replace `.chunks(64)` with `.chunks_exact(16)` in `has_true()` and
`has_false()` as suggested in
https://github.com/apache/arrow-rs/pull/9511#discussion_r2950942579
- With `chunks_exact`, the compiler can fully unroll the inner fold
(guaranteed size, no inner branch/loop), allowing a smaller block size
for more frequent short-circuit exits without regressing the full-scan
path
## Benchmark results (block size 16 vs baseline)
- Full-scan worst case (65536): No regression (~49ns both)
- Early-exit cases (65536): ~27% faster (6.0ns → 4.4ns)
- Small arrays (64, 1024): Unchanged
## Test plan
- [x] All 13 existing `test_has` tests pass
run benchmarks boolean_array
@DanDanDan Would appreciate your review!
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
arrow-array/src/array/boolean_array.rs | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/arrow-array/src/array/boolean_array.rs
b/arrow-array/src/array/boolean_array.rs
index 1a2dd986ad..ee3413e183 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -158,9 +158,9 @@ impl BooleanArray {
}
/// Block size for chunked fold operations in [`Self::has_true`] and
[`Self::has_false`].
- /// Folding this many u64 chunks at a time allows the compiler to
autovectorize
- /// the inner loop while still enabling short-circuit exits.
- const CHUNK_FOLD_BLOCK_SIZE: usize = 64;
+ /// Using `chunks_exact` with this size lets the compiler fully unroll the
inner
+ /// fold (no inner branch/loop), enabling short-circuit exits every N
chunks.
+ const CHUNK_FOLD_BLOCK_SIZE: usize = 16;
/// Returns an [`UnalignedBitChunk`] over this array's values.
fn unaligned_bit_chunks(&self) -> UnalignedBitChunk<'_> {
@@ -204,11 +204,12 @@ impl BooleanArray {
}
None => {
let bit_chunks = self.unaligned_bit_chunks();
- bit_chunks.prefix().unwrap_or(0) != 0
- || bit_chunks
- .chunks()
- .chunks(Self::CHUNK_FOLD_BLOCK_SIZE)
- .any(|block| block.iter().fold(0u64, |acc, &c| acc |
c) != 0)
+ let chunks = bit_chunks.chunks();
+ let mut exact =
chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
+ let found = bit_chunks.prefix().unwrap_or(0) != 0
+ || exact.any(|block| block.iter().fold(0u64, |acc, &c| acc
| c) != 0);
+ found
+ || exact.remainder().iter().any(|&c| c != 0)
|| bit_chunks.suffix().unwrap_or(0) != 0
}
}
@@ -243,13 +244,15 @@ impl BooleanArray {
(None, Some(_)) => (0, !trail_mask),
(None, None) => (0, 0),
};
- bit_chunks
+ let chunks = bit_chunks.chunks();
+ let mut exact =
chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
+ let found = bit_chunks
.prefix()
.is_some_and(|v| (v | prefix_fill) != u64::MAX)
- || bit_chunks
- .chunks()
- .chunks(Self::CHUNK_FOLD_BLOCK_SIZE)
- .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc
& c) != u64::MAX)
+ || exact
+ .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc
& c) != u64::MAX);
+ found
+ || exact.remainder().iter().any(|&c| c != u64::MAX)
|| bit_chunks
.suffix()
.is_some_and(|v| (v | suffix_fill) != u64::MAX)