efredine commented on code in PR #11289:
URL: https://github.com/apache/datafusion/pull/11289#discussion_r1676560183
##########
datafusion/core/tests/parquet/arrow_statistics.rs:
##########
@@ -1984,7 +1981,96 @@ async fn test_struct() {
}
.run();
}
+// test nested struct
+#[tokio::test]
+async fn test_nested_struct() {
+ // This creates a parquet file with 1 column named "nested_struct"
+ // The file is created by 1 record batch with 3 rows in the nested struct
array
+ let reader = TestReader {
+ scenario: Scenario::StructArrayNested,
+ row_per_group: 5,
+ }
+ .build()
+ .await;
+ // Expected minimum and maximum values for nested struct fields
+ let inner_min = StructArray::from(vec![
+ (
+ Arc::new(Field::new("b", DataType::Boolean, false)),
+ Arc::new(BooleanArray::from(vec![Some(false)])) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("c", DataType::Int32, false)),
+ Arc::new(Int32Array::from(vec![Some(42)])) as ArrayRef,
+ ),
+ ]);
+ let inner_max = StructArray::from(vec![
+ (
+ Arc::new(Field::new("b", DataType::Boolean, false)),
+ Arc::new(BooleanArray::from(vec![Some(true)])) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("c", DataType::Int32, false)),
+ Arc::new(Int32Array::from(vec![Some(44)])) as ArrayRef,
+ ),
+ ]);
+
+ let inner_fields = Fields::from(vec![
+ Field::new("b", DataType::Boolean, false),
+ Field::new("c", DataType::Int32, false),
+ ]);
+
+ // Expected minimum outer struct
+ let expected_min_outer = StructArray::from(vec![
+ (
+ Arc::new(Field::new(
+ "inner_struct",
+ DataType::Struct(inner_fields.clone()),
+ false,
+ )),
+ Arc::new(inner_min) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("outer_float", DataType::Float64, false)),
+ Arc::new(Float64Array::from(vec![Some(5.0)])) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("outer_boolean", DataType::Boolean, false)),
+ Arc::new(BooleanArray::from(vec![Some(false)])) as ArrayRef,
+ ),
+ ]);
+
+ // Expected maximum outer struct
+ let expected_max_outer = StructArray::from(vec![
+ (
+ Arc::new(Field::new(
+ "inner_struct",
+ DataType::Struct(inner_fields),
+ false,
+ )),
+ Arc::new(inner_max) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("outer_float", DataType::Float64, false)),
+ Arc::new(Float64Array::from(vec![Some(7.0)])) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("outer_boolean", DataType::Boolean, false)),
+ Arc::new(BooleanArray::from(vec![Some(true)])) as ArrayRef,
+ ),
+ ]);
+
Review Comment:
Here's some tests in the arrow crate that are writing a 2 level struct:
https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_writer/mod.rs#L1516-L1618
Not sure if that's helpful or not.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]