This is an automated email from the ASF dual-hosted git repository.
scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 73a516e3bc Move `ListLikeArray` to arrow-array to be shared with json
writer and parquet unshredding (#9437)
73a516e3bc is described below
commit 73a516e3bc9d3850f16b66d6cb65d01e6b080c97
Author: Liam Bao <[email protected]>
AuthorDate: Mon Mar 2 16:49:56 2026 -0500
Move `ListLikeArray` to arrow-array to be shared with json writer and
parquet unshredding (#9437)
# Which issue does this PR close?
- Part of #9340.
# Rationale for this change
Json writers for ListLike types (List/ListView/FixedSizeList) are pretty
similar apart from the element range representation. We already had a
good way to abstract this kind of encoder in parquet variant
unshredding. Given this, it would be good to move this `ListLikeArray`
trait to arrow-array to be shared with json/parquet
# What changes are included in this PR?
Move `ListLikeArray` trait from parquet-variant-compute to arrow-array
# Are these changes tested?
Covered by existing tests
# Are there any user-facing changes?
New pub trait in arrow-array
---
arrow-array/src/array/fixed_size_list_array.rs | 12 ++++++
arrow-array/src/array/list_array.rs | 13 ++++++
arrow-array/src/array/list_view_array.rs | 12 ++++++
arrow-array/src/array/mod.rs | 15 +++++++
parquet-variant-compute/src/arrow_to_variant.rs | 53 +------------------------
parquet-variant-compute/src/shred_variant.rs | 4 +-
parquet-variant-compute/src/unshred_variant.rs | 4 +-
7 files changed, 58 insertions(+), 55 deletions(-)
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index ce75855c68..a3db33d61b 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -530,6 +530,18 @@ unsafe impl Array for FixedSizeListArray {
}
}
+impl super::ListLikeArray for FixedSizeListArray {
+ fn values(&self) -> &ArrayRef {
+ self.values()
+ }
+
+ fn element_range(&self, index: usize) -> std::ops::Range<usize> {
+ let value_length = self.value_length().as_usize();
+ let offset = index * value_length;
+ offset..(offset + value_length)
+ }
+}
+
impl ArrayAccessor for FixedSizeListArray {
type Item = ArrayRef;
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index e4c603e0d9..d9613c6809 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -622,6 +622,19 @@ unsafe impl<OffsetSize: OffsetSizeTrait> Array for
GenericListArray<OffsetSize>
}
}
+impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for
GenericListArray<OffsetSize> {
+ fn values(&self) -> &ArrayRef {
+ self.values()
+ }
+
+ fn element_range(&self, index: usize) -> std::ops::Range<usize> {
+ let offsets = self.offsets();
+ let start = offsets[index].as_usize();
+ let end = offsets[index + 1].as_usize();
+ start..end
+ }
+}
+
impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for
&GenericListArray<OffsetSize> {
type Item = ArrayRef;
diff --git a/arrow-array/src/array/list_view_array.rs
b/arrow-array/src/array/list_view_array.rs
index b8d427d829..eda3be11ac 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -488,6 +488,18 @@ unsafe impl<OffsetSize: OffsetSizeTrait> Array for
GenericListViewArray<OffsetSi
}
}
+impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for
GenericListViewArray<OffsetSize> {
+ fn values(&self) -> &ArrayRef {
+ self.values()
+ }
+
+ fn element_range(&self, index: usize) -> std::ops::Range<usize> {
+ let offset = self.value_offsets()[index].as_usize();
+ let size = self.value_sizes()[index].as_usize();
+ offset..(offset + size)
+ }
+}
+
impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for
GenericListViewArray<OffsetSize> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let prefix = OffsetSize::PREFIX;
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 0d8125a2a1..ca3a02577f 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -667,6 +667,21 @@ impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
}
}
+/// A trait for Arrow list-like arrays, abstracting over
+/// [`GenericListArray`], [`GenericListViewArray`], and [`FixedSizeListArray`].
+///
+/// This trait provides a uniform interface for accessing the child values and
+/// computing the element range for a given index, regardless of the underlying
+/// list layout (offsets, offsets+sizes, or fixed-size).
+pub trait ListLikeArray: Array {
+ /// Returns the child values array.
+ fn values(&self) -> &ArrayRef;
+
+ /// Returns the start and end indices into the values array for the list
+ /// element at `index`.
+ fn element_range(&self, index: usize) -> std::ops::Range<usize>;
+}
+
impl PartialEq for dyn Array + '_ {
fn eq(&self, other: &Self) -> bool {
self.to_data().eq(&other.to_data())
diff --git a/parquet-variant-compute/src/arrow_to_variant.rs
b/parquet-variant-compute/src/arrow_to_variant.rs
index be241a9a4e..03a84109ff 100644
--- a/parquet-variant-compute/src/arrow_to_variant.rs
+++ b/parquet-variant-compute/src/arrow_to_variant.rs
@@ -16,8 +16,8 @@
// under the License.
use arrow::array::{
- Array, ArrayRef, AsArray, FixedSizeListArray, GenericBinaryArray,
GenericListArray,
- GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+ Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
GenericListViewArray,
+ GenericStringArray, ListLikeArray, OffsetSizeTrait, PrimitiveArray,
};
use arrow::compute::{CastOptions, kernels::cast};
use arrow::datatypes::{
@@ -32,7 +32,6 @@ use parquet_variant::{
VariantDecimal16, VariantDecimalType,
};
use std::collections::HashMap;
-use std::ops::Range;
// ============================================================================
// Row-oriented builders for efficient Arrow-to-Variant conversion
@@ -552,54 +551,6 @@ impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a,
L> {
}
}
-/// Trait for list-like arrays that can provide element ranges
-pub(crate) trait ListLikeArray: Array {
- /// Get the values array
- fn values(&self) -> &ArrayRef;
-
- /// Get the start and end indices for a list element
- fn element_range(&self, index: usize) -> Range<usize>;
-}
-
-impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
- fn values(&self) -> &ArrayRef {
- self.values()
- }
-
- fn element_range(&self, index: usize) -> Range<usize> {
- let offsets = self.offsets();
- let start = offsets[index].as_usize();
- let end = offsets[index + 1].as_usize();
- start..end
- }
-}
-
-impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
- fn values(&self) -> &ArrayRef {
- self.values()
- }
-
- fn element_range(&self, index: usize) -> Range<usize> {
- let offsets = self.value_offsets();
- let sizes = self.value_sizes();
- let offset = offsets[index].as_usize();
- let size = sizes[index].as_usize();
- offset..(offset + size)
- }
-}
-
-impl ListLikeArray for FixedSizeListArray {
- fn values(&self) -> &ArrayRef {
- self.values()
- }
-
- fn element_range(&self, index: usize) -> Range<usize> {
- let value_length = self.value_length().as_usize();
- let offset = index * value_length;
- offset..(offset + value_length)
- }
-}
-
/// Struct builder for StructArray
pub(crate) struct StructArrowToVariantBuilder<'a> {
struct_array: &'a arrow::array::StructArray,
diff --git a/parquet-variant-compute/src/shred_variant.rs
b/parquet-variant-compute/src/shred_variant.rs
index c60c602baa..6fa3a930fc 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -652,10 +652,10 @@ impl VariantSchemaNode {
mod tests {
use super::*;
use crate::VariantArrayBuilder;
- use crate::arrow_to_variant::ListLikeArray;
use arrow::array::{
Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array,
GenericListArray,
- GenericListViewArray, Int64Array, ListArray, OffsetSizeTrait,
PrimitiveArray, StringArray,
+ GenericListViewArray, Int64Array, ListArray, ListLikeArray,
OffsetSizeTrait,
+ PrimitiveArray, StringArray,
};
use arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit,
UnionFields, UnionMode,
diff --git a/parquet-variant-compute/src/unshred_variant.rs
b/parquet-variant-compute/src/unshred_variant.rs
index 37363fd9d0..3600662915 100644
--- a/parquet-variant-compute/src/unshred_variant.rs
+++ b/parquet-variant-compute/src/unshred_variant.rs
@@ -17,11 +17,11 @@
//! Module for unshredding VariantArray by folding typed_value columns back
into the value column.
-use crate::arrow_to_variant::ListLikeArray;
use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
use arrow::array::{
Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray,
FixedSizeListArray,
- GenericListArray, GenericListViewArray, PrimitiveArray, StringArray,
StructArray,
+ GenericListArray, GenericListViewArray, ListLikeArray, PrimitiveArray,
StringArray,
+ StructArray,
};
use arrow::buffer::NullBuffer;
use arrow::datatypes::{