efredine commented on code in PR #11169:
URL: https://github.com/apache/datafusion/pull/11169#discussion_r1659431592
##########
datafusion/core/src/datasource/file_format/parquet.rs:
##########
@@ -1439,6 +1441,57 @@ mod tests {
Ok(())
}
+ #[tokio::test]
+ async fn test_statistics_from_parquet_metadata_dictionary() -> Result<()> {
+ // Data for column c_dic: ["a", "b", "c", "d"]
+ let values = StringArray::from_iter_values(["a", "b", "c", "d"]);
+ let keys = Int32Array::from_iter_values([0, 0, 1, 2]);
+ let dic_array =
+ DictionaryArray::<Int32Type>::try_new(keys,
Arc::new(values)).unwrap();
+ let boxed_array: Box<dyn arrow_array::Array> = Box::new(dic_array);
+ let c_dic: ArrayRef = Arc::from(boxed_array);
+
+ // Define the schema
+ let field = Field::new(
+ "c_dic",
+ DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
+ false,
+ );
+ let schema = Schema::new(vec![field]);
+ // Create the RecordBatch
+ let batch1 = RecordBatch::try_new(Arc::new(schema),
vec![c_dic]).unwrap();
Review Comment:
Thanks for this - I think it will also turn out to be useful in simplifying
some other tests I'm writing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]