Lordworms commented on code in PR #10730:
URL: https://github.com/apache/datafusion/pull/10730#discussion_r1623297121


##########
datafusion/core/src/datasource/physical_plan/parquet/statistics.rs:
##########
@@ -216,6 +225,96 @@ pub(crate) fn min_statistics<'a, I: Iterator<Item = 
Option<&'a ParquetStatistics
     collect_scalars(data_type, scalars)
 }
 
+/// Extract the min statistics for struct array
+pub(crate) fn struct_min_statistics(
+    row_groups: &[RowGroupMetaData],
+    struct_fields: &Fields,
+) -> Result<ArrayRef, DataFusionError> {
+    let mut child_data = Vec::new();
+    let mut fields = Vec::new();
+
+    if struct_fields.iter().any(|f| f.data_type().is_nested()) {
+        return Ok(new_empty_array(&DataType::Struct(struct_fields.clone())));
+    }
+
+    for (idx, field) in struct_fields.iter().enumerate() {
+        // Handle non-nested fields
+        let max_value = row_groups
+            .iter()
+            .map(|x| x.column(idx).statistics())
+            .map(|x| {
+                x.and_then(|s| get_statistic!(s, min, min_bytes, 
Some(field.data_type())))
+            });
+        let array = collect_scalars(field.data_type(), max_value)?;
+        child_data.push(Arc::new(array) as Arc<dyn Array>);
+        fields.push(Arc::new(Field::new(
+            field.name(),
+            field.data_type().clone(),
+            field.is_nullable(),
+        )));
+    }
+    // Create a StructArray from collected fields and data
+    let struct_array =
+        
StructArray::from(fields.into_iter().zip(child_data).collect::<Vec<_>>());
+    println!("the struct array is {:?}", struct_array);
+    Ok(Arc::new(struct_array) as ArrayRef)
+}
+
+/// Extract the max statistics for struct array
+pub(crate) fn struct_max_statistics(
+    row_groups: &[RowGroupMetaData],
+    struct_fields: &Fields,
+) -> Result<ArrayRef, DataFusionError> {
+    let mut child_data = Vec::new();
+    let mut fields = Vec::new();
+
+    if struct_fields.iter().any(|f| f.data_type().is_nested()) {
+        return Ok(new_empty_array(&DataType::Struct(struct_fields.clone())));
+    }
+
+    for (idx, field) in struct_fields.iter().enumerate() {
+        // Handle non-nested fields
+        let max_value = row_groups
+            .iter()
+            .map(|x| x.column(idx).statistics())
+            .map(|x| {
+                x.and_then(|s| get_statistic!(s, max, max_bytes, 
Some(field.data_type())))
+            });
+        let array = collect_scalars(field.data_type(), max_value)?;
+        child_data.push(Arc::new(array) as Arc<dyn Array>);
+        fields.push(Arc::new(Field::new(
+            field.name(),
+            field.data_type().clone(),
+            field.is_nullable(),
+        )));
+    }
+    // Create a StructArray from collected fields and data
+    let struct_array =
+        
StructArray::from(fields.into_iter().zip(child_data).collect::<Vec<_>>());
+    Ok(Arc::new(struct_array) as ArrayRef)
+}
+
+/// Extract the nullcount statistics for struct array
+pub(crate) fn struct_null_count_statistics(
+    row_groups: &[RowGroupMetaData],
+    struct_fields: &Fields,
+) -> Result<ArrayRef, DataFusionError> {
+    if struct_fields.iter().any(|f| f.data_type().is_nested()) {
+        return Ok(Arc::new(new_empty_array(&DataType::UInt64)) as ArrayRef);
+    }
+
+    let mut null_count: u64 = 0;

Review Comment:
   sure, I'll fix it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to