wiedld commented on code in PR #13866: URL: https://github.com/apache/datafusion/pull/13866#discussion_r1894526449
########## datafusion/core/src/datasource/file_format/parquet.rs: ########## @@ -2335,42 +2347,64 @@ mod tests { async fn parquet_sink_write() -> Result<()> { let parquet_sink = create_written_parquet_sink("file:///").await?; - // assert written - let mut written = parquet_sink.written(); - let written = written.drain(); - assert_eq!( - written.len(), - 1, - "expected a single parquet files to be written, instead found {}", - written.len() - ); + // assert written to proper path + let (path, file_metadata) = get_written(parquet_sink)?; + let path_parts = path.parts().collect::<Vec<_>>(); + assert_eq!(path_parts.len(), 1, "should not have path prefix"); // check the file metadata - let ( - path, - FileMetaData { - num_rows, - schema, - key_value_metadata, - .. + let expected_kv_meta = vec![ + KeyValue { + key: "my-data".to_string(), + value: Some("stuff".to_string()), }, - ) = written.take(1).next().unwrap(); + KeyValue { + key: "my-data-bool-key".to_string(), + value: None, + }, + ]; + assert_file_metadata(file_metadata, expected_kv_meta); + + Ok(()) + } + + #[tokio::test] + async fn parquet_sink_parallel_write() -> Result<()> { Review Comment: New test added. ########## datafusion/core/src/datasource/file_format/parquet.rs: ########## @@ -2335,42 +2347,64 @@ mod tests { async fn parquet_sink_write() -> Result<()> { let parquet_sink = create_written_parquet_sink("file:///").await?; - // assert written - let mut written = parquet_sink.written(); - let written = written.drain(); - assert_eq!( - written.len(), - 1, - "expected a single parquet files to be written, instead found {}", - written.len() - ); + // assert written to proper path + let (path, file_metadata) = get_written(parquet_sink)?; + let path_parts = path.parts().collect::<Vec<_>>(); + assert_eq!(path_parts.len(), 1, "should not have path prefix"); // check the file metadata - let ( - path, - FileMetaData { - num_rows, - schema, - key_value_metadata, - .. + let expected_kv_meta = vec![ + KeyValue { + key: "my-data".to_string(), + value: Some("stuff".to_string()), }, - ) = written.take(1).next().unwrap(); + KeyValue { + key: "my-data-bool-key".to_string(), + value: None, + }, + ]; + assert_file_metadata(file_metadata, expected_kv_meta); + + Ok(()) + } + + #[tokio::test] + async fn parquet_sink_parallel_write() -> Result<()> { + let opts = ParquetOptions { + allow_single_file_parallelism: true, + maximum_parallel_row_group_writers: 2, + maximum_buffered_record_batches_per_stream: 2, + ..Default::default() + }; + + let parquet_sink = + create_written_parquet_sink_using_config("file:///", opts).await?; + + // assert written to proper path + let (path, file_metadata) = get_written(parquet_sink)?; let path_parts = path.parts().collect::<Vec<_>>(); assert_eq!(path_parts.len(), 1, "should not have path prefix"); - assert_eq!(num_rows, 2, "file metadata to have 2 rows"); - assert!( - schema.iter().any(|col_schema| col_schema.name == "a"), - "output file metadata should contain col a" - ); - assert!( - schema.iter().any(|col_schema| col_schema.name == "b"), - "output file metadata should contain col b" - ); + // check the file metadata + let expected_kv_meta = vec![ + KeyValue { + key: "my-data".to_string(), + value: Some("stuff".to_string()), + }, + KeyValue { + key: "my-data-bool-key".to_string(), + value: None, + }, + ]; + assert_file_metadata(file_metadata, expected_kv_meta); - let mut key_value_metadata = key_value_metadata.unwrap(); - key_value_metadata.sort_by(|a, b| a.key.cmp(&b.key)); - let expected_metadata = vec![ + Ok(()) + } + + #[tokio::test] + async fn parquet_sink_write_insert_schema_into_metadata() -> Result<()> { Review Comment: New test added. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org