This is an automated email from the ASF dual-hosted git repository.

scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 55ff6eb788 add `shred_variant` support for `LargeUtf8` and 
`LargeBinary` (#9554)
55ff6eb788 is described below

commit 55ff6eb7885f757f2d8637400f223eb84bb6a500
Author: Konstantin Tarasov <[email protected]>
AuthorDate: Mon Mar 16 10:23:01 2026 -0400

    add `shred_variant` support for `LargeUtf8` and `LargeBinary` (#9554)
    
    # Which issue does this PR close?
    
    - Closes #9525 .
    
    # Rationale for this change
    
    check issue.
    
    # What changes are included in this PR?
    
    Add `shred_variant` support for `LargeUtf8` and `LargeBinary`
    
    # Are these changes tested?
    
    Yes, unit tests.
    
    # Are there any user-facing changes?
    
    No
---
 parquet-variant-compute/src/shred_variant.rs | 122 ++++++++++++++++++++++++++-
 1 file changed, 118 insertions(+), 4 deletions(-)

diff --git a/parquet-variant-compute/src/shred_variant.rs 
b/parquet-variant-compute/src/shred_variant.rs
index 6fa3a930fc..d0087fd2c7 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -147,8 +147,10 @@ pub(crate) fn 
make_variant_to_shredded_variant_arrow_row_builder<'a>(
         | DataType::Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _)
         | DataType::Binary
         | DataType::BinaryView
+        | DataType::LargeBinary
         | DataType::Utf8
         | DataType::Utf8View
+        | DataType::LargeUtf8
         | DataType::FixedSizeBinary(16) // UUID
         => {
             let builder =
@@ -654,8 +656,8 @@ mod tests {
     use crate::VariantArrayBuilder;
     use arrow::array::{
         Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array, 
GenericListArray,
-        GenericListViewArray, Int64Array, ListArray, ListLikeArray, 
OffsetSizeTrait,
-        PrimitiveArray, StringArray,
+        GenericListViewArray, Int64Array, LargeBinaryArray, LargeStringArray, 
ListArray,
+        ListLikeArray, OffsetSizeTrait, PrimitiveArray, StringArray,
     };
     use arrow::datatypes::{
         ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit, 
UnionFields, UnionMode,
@@ -1144,6 +1146,120 @@ mod tests {
         assert!(typed_value_float64.is_null(2)); // string doesn't convert
     }
 
+    #[test]
+    // TODO(#9518): Drop this once variant_get tests build shredded fixtures 
via shred_variant.
+    fn test_largeutf8_shredding() {
+        let input = VariantArray::from_iter(vec![
+            Some(Variant::from("hello")),
+            Some(Variant::from(42i64)),
+            None,
+            Some(Variant::Null),
+            Some(Variant::from("world")),
+        ]);
+
+        let result = shred_variant(&input, &DataType::LargeUtf8).unwrap();
+        let metadata = result.metadata_field();
+        let value = result.value_field().unwrap();
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<LargeStringArray>()
+            .unwrap();
+
+        assert_eq!(result.len(), 5);
+
+        // Row 0: string shreds to typed_value
+        assert!(result.is_valid(0));
+        assert!(value.is_null(0));
+        assert_eq!(typed_value.value(0), "hello");
+
+        // Row 1: integer falls back to value
+        assert!(result.is_valid(1));
+        assert!(value.is_valid(1));
+        assert!(typed_value.is_null(1));
+        assert_eq!(
+            Variant::new(metadata.value(1), value.value(1)),
+            Variant::from(42i64)
+        );
+
+        // Row 2: top-level null
+        assert!(result.is_null(2));
+        assert!(value.is_null(2));
+        assert!(typed_value.is_null(2));
+
+        // Row 3: variant null falls back to value
+        assert!(result.is_valid(3));
+        assert!(value.is_valid(3));
+        assert!(typed_value.is_null(3));
+        assert_eq!(
+            Variant::new(metadata.value(3), value.value(3)),
+            Variant::Null
+        );
+
+        // Row 4: string shreds to typed_value
+        assert!(result.is_valid(4));
+        assert!(value.is_null(4));
+        assert_eq!(typed_value.value(4), "world");
+    }
+
+    #[test]
+    // TODO(#9518): Drop this once variant_get tests build shredded fixtures 
via shred_variant.
+    fn test_largebinary_shredding() {
+        let input = VariantArray::from_iter(vec![
+            Some(Variant::from(&b"\x00\x01\x02"[..])),
+            Some(Variant::from("not_binary")),
+            None,
+            Some(Variant::Null),
+            Some(Variant::from(&b"\xff\xaa"[..])),
+        ]);
+
+        let result = shred_variant(&input, &DataType::LargeBinary).unwrap();
+        let metadata = result.metadata_field();
+        let value = result.value_field().unwrap();
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<LargeBinaryArray>()
+            .unwrap();
+
+        assert_eq!(result.len(), 5);
+
+        // Row 0: binary shreds to typed_value
+        assert!(result.is_valid(0));
+        assert!(value.is_null(0));
+        assert_eq!(typed_value.value(0), &[0x00, 0x01, 0x02]);
+
+        // Row 1: string falls back to value
+        assert!(result.is_valid(1));
+        assert!(value.is_valid(1));
+        assert!(typed_value.is_null(1));
+        assert_eq!(
+            Variant::new(metadata.value(1), value.value(1)),
+            Variant::from("not_binary")
+        );
+
+        // Row 2: top-level null
+        assert!(result.is_null(2));
+        assert!(value.is_null(2));
+        assert!(typed_value.is_null(2));
+
+        // Row 3: variant null falls back to value
+        assert!(result.is_valid(3));
+        assert!(value.is_valid(3));
+        assert!(typed_value.is_null(3));
+        assert_eq!(
+            Variant::new(metadata.value(3), value.value(3)),
+            Variant::Null
+        );
+
+        // Row 4: binary shreds to typed_value
+        assert!(result.is_valid(4));
+        assert!(value.is_null(4));
+        assert_eq!(typed_value.value(4), &[0xff, 0xaa]);
+    }
+
     #[test]
     fn test_invalid_shredded_types_rejected() {
         let input = VariantArray::from_iter([Variant::from(42)]);
@@ -1156,8 +1272,6 @@ mod tests {
             DataType::Time32(TimeUnit::Second),
             DataType::Time64(TimeUnit::Nanosecond),
             DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::LargeBinary,
-            DataType::LargeUtf8,
             DataType::FixedSizeBinary(17),
             DataType::Union(
                 UnionFields::from_fields(vec![

Reply via email to