This is an automated email from the ASF dual-hosted git repository.

kontinuation pushed a commit to branch sd-format-for-all-types
in repository https://gitbox.apache.org/repos/asf/sedona-db.git

commit a9f72681e3dfdf2b09a799e73ae50c62cf3e8df4
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Fri Aug 29 18:59:07 2025 +0800

    Support list view
---
 rust/sedona-functions/src/sd_format.rs | 101 ++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 3 deletions(-)

diff --git a/rust/sedona-functions/src/sd_format.rs 
b/rust/sedona-functions/src/sd_format.rs
index 2ace178..2d65412 100644
--- a/rust/sedona-functions/src/sd_format.rs
+++ b/rust/sedona-functions/src/sd_format.rs
@@ -2,7 +2,8 @@ use std::{sync::Arc, vec};
 
 use crate::executor::WkbExecutor;
 use arrow_array::{
-    builder::StringBuilder, cast::AsArray, Array, GenericListArray, 
OffsetSizeTrait, StructArray,
+    builder::StringBuilder, cast::AsArray, Array, GenericListArray, 
GenericListViewArray,
+    OffsetSizeTrait, StructArray,
 };
 use arrow_schema::{DataType, Field, Fields};
 use datafusion_common::{
@@ -183,6 +184,15 @@ fn columnar_value_to_formatted_value(
                 }
                 _ => internal_err!("Unsupported list columnar value"),
             },
+            DataType::ListView(field) => match columnar_value {
+                ColumnarValue::Array(array) => {
+                    let list_array = array.as_list_view::<i32>();
+                    let formatted_list_array =
+                        list_view_value_to_formatted_value(field, list_array, 
maybe_width_hint)?;
+                    Ok(ColumnarValue::Array(Arc::new(formatted_list_array)))
+                }
+                _ => internal_err!("Unsupported list view columnar value"),
+            },
             _ => Ok(columnar_value.clone()),
         },
     }
@@ -288,6 +298,37 @@ fn list_value_to_formatted_value<OffsetSize: 
OffsetSizeTrait>(
     ))
 }
 
+fn list_view_value_to_formatted_value<OffsetSize: OffsetSizeTrait>(
+    field: &Field,
+    list_view_array: &GenericListViewArray<OffsetSize>,
+    maybe_width_hint: Option<usize>,
+) -> Result<GenericListViewArray<OffsetSize>> {
+    let values_array = list_view_array.values();
+    let offsets = list_view_array.offsets();
+    let sizes = list_view_array.sizes();
+    let nulls = list_view_array.nulls();
+
+    let new_field = field_to_formatted_field(field)?;
+    let sedona_type = SedonaType::from_data_type(field.data_type())?;
+    let unwrapped_values_array = sedona_type.unwrap_array(values_array)?;
+    let new_columnar_value = columnar_value_to_formatted_value(
+        &sedona_type,
+        &ColumnarValue::Array(unwrapped_values_array),
+        maybe_width_hint,
+    )?;
+    let ColumnarValue::Array(new_values_array) = new_columnar_value else {
+        return internal_err!("Expected Array");
+    };
+
+    Ok(GenericListViewArray::<OffsetSize>::new(
+        Arc::new(new_field),
+        offsets.clone(),
+        sizes.clone(),
+        new_values_array,
+        nulls.cloned(),
+    ))
+}
+
 struct LimitedSizeOutput<'a, T> {
     inner: &'a mut T,
     current_item_size: usize,
@@ -617,8 +658,6 @@ mod tests {
         #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOMETRY, 
WKB_VIEW_GEOGRAPHY)]
         sedona_type: SedonaType,
     ) -> Result<()> {
-        use std::sync::Arc;
-
         let udf = sd_format_udf();
 
         // Create an array of WKB geometries using storage format
@@ -666,6 +705,62 @@ mod tests {
         Ok(())
     }
 
+    #[rstest]
+    fn sd_format_should_format_spatial_list_views(
+        #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOMETRY, 
WKB_VIEW_GEOGRAPHY)]
+        sedona_type: SedonaType,
+    ) -> Result<()> {
+        let udf = sd_format_udf();
+
+        // Create an array of WKB geometries using storage format
+        let geom_values = vec![
+            Some("POINT(1 2)"),
+            Some("LINESTRING(0 0,1 1)"),
+            None,
+            Some("POLYGON((0 0,1 1,1 0,0 0))"),
+        ];
+        let geom_array = create_array(&geom_values, &sedona_type);
+
+        // Create a ListView containing the geometry array
+        let field = Arc::new(Field::new("geom", sedona_type.data_type(), 
true));
+        let offsets = ScalarBuffer::from(vec![0i32, 2i32]); // Two list views: 
[0,2) and [2,4)
+        let sizes = ScalarBuffer::from(vec![2i32, 2i32]); // Each list view 
has 2 elements
+        let list_view_array = ListViewArray::new(field, offsets, sizes, 
geom_array, None);
+
+        // Create tester
+        let input_sedona_type = 
SedonaType::Arrow(list_view_array.data_type().clone());
+        let tester = ScalarUdfTester::new(udf.clone().into(), 
vec![input_sedona_type]);
+
+        // Execute the UDF
+        let result = tester.invoke_array(Arc::new(list_view_array));
+        let output_array = result.unwrap();
+        let formatted_list_view = output_array
+            .as_any()
+            .downcast_ref::<ListViewArray>()
+            .unwrap();
+
+        // Check that the list view field type is now UTF8 (formatted from WKB)
+        let list_field = formatted_list_view.data_type();
+        if let DataType::ListView(inner_field) = list_field {
+            assert_eq!(inner_field.data_type(), &DataType::Utf8);
+        } else {
+            panic!("Expected ListView data type, got: {:?}", list_field);
+        }
+
+        // Check the actual formatted values in the list view
+        let values_array = formatted_list_view.values();
+        if let Some(utf8_array) = 
values_array.as_any().downcast_ref::<StringArray>() {
+            assert_wkt_values_match(utf8_array, &geom_values);
+        } else {
+            panic!(
+                "Expected list view elements to be formatted as UTF8 strings, 
got: {:?}",
+                values_array.data_type()
+            );
+        }
+
+        Ok(())
+    }
+
     #[rstest]
     fn sd_format_should_format_struct_containing_list_of_geometries(
         #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOMETRY, 
WKB_VIEW_GEOGRAPHY)]

Reply via email to