Blizzara commented on code in PR #13750: URL: https://github.com/apache/datafusion/pull/13750#discussion_r1884069224
########## datafusion/core/src/datasource/file_format/csv.rs: ########## @@ -1259,73 +1259,57 @@ mod tests { Ok(()) } - /// Read a single empty csv file in parallel + /// Read a single empty csv file /// /// empty_0_byte.csv: /// (file is empty) - #[rstest(n_partitions, case(1), case(2), case(3), case(4))] #[tokio::test] - async fn test_csv_parallel_empty_file(n_partitions: usize) -> Result<()> { - let config = SessionConfig::new() - .with_repartition_file_scans(true) - .with_repartition_file_min_size(0) - .with_target_partitions(n_partitions); - let ctx = SessionContext::new_with_config(config); + async fn test_csv_empty_file() -> Result<()> { + let ctx = SessionContext::new(); ctx.register_csv( "empty", "tests/data/empty_0_byte.csv", CsvReadOptions::new().has_header(false), ) .await?; - // Require a predicate to enable repartition for the optimizer let query = "select * from empty where random() > 0.5;"; let query_result = ctx.sql(query).await?.collect().await?; - let actual_partitions = count_query_csv_partitions(&ctx, query).await?; #[rustfmt::skip] let expected = ["++", "++"]; assert_batches_eq!(expected, &query_result); - assert_eq!(1, actual_partitions); // Won't get partitioned if all files are empty Ok(()) } - /// Read a single empty csv file with header in parallel + /// Read a single empty csv file with header /// /// empty.csv: /// c1,c2,c3 - #[rstest(n_partitions, case(1), case(2), case(3))] #[tokio::test] - async fn test_csv_parallel_empty_with_header(n_partitions: usize) -> Result<()> { - let config = SessionConfig::new() - .with_repartition_file_scans(true) - .with_repartition_file_min_size(0) - .with_target_partitions(n_partitions); - let ctx = SessionContext::new_with_config(config); + async fn test_csv_empty_with_header() -> Result<()> { + let ctx = SessionContext::new(); ctx.register_csv( "empty", "tests/data/empty.csv", CsvReadOptions::new().has_header(true), ) .await?; - // Require a predicate to enable repartition for the optimizer let query = "select * from empty where random() > 0.5;"; let query_result = ctx.sql(query).await?.collect().await?; - let actual_partitions = count_query_csv_partitions(&ctx, query).await?; Review Comment: this checks that the plan has a `CsvExec` node, which we no longer have (now it's a `TableScan: empty` / `EmptyExec` ########## datafusion/core/src/datasource/file_format/csv.rs: ########## @@ -1259,73 +1259,57 @@ mod tests { Ok(()) } - /// Read a single empty csv file in parallel + /// Read a single empty csv file /// /// empty_0_byte.csv: /// (file is empty) - #[rstest(n_partitions, case(1), case(2), case(3), case(4))] #[tokio::test] - async fn test_csv_parallel_empty_file(n_partitions: usize) -> Result<()> { - let config = SessionConfig::new() - .with_repartition_file_scans(true) - .with_repartition_file_min_size(0) - .with_target_partitions(n_partitions); - let ctx = SessionContext::new_with_config(config); + async fn test_csv_empty_file() -> Result<()> { + let ctx = SessionContext::new(); ctx.register_csv( "empty", "tests/data/empty_0_byte.csv", CsvReadOptions::new().has_header(false), ) .await?; - // Require a predicate to enable repartition for the optimizer let query = "select * from empty where random() > 0.5;"; let query_result = ctx.sql(query).await?.collect().await?; - let actual_partitions = count_query_csv_partitions(&ctx, query).await?; #[rustfmt::skip] let expected = ["++", "++"]; assert_batches_eq!(expected, &query_result); - assert_eq!(1, actual_partitions); // Won't get partitioned if all files are empty Ok(()) } - /// Read a single empty csv file with header in parallel + /// Read a single empty csv file with header /// /// empty.csv: /// c1,c2,c3 - #[rstest(n_partitions, case(1), case(2), case(3))] #[tokio::test] - async fn test_csv_parallel_empty_with_header(n_partitions: usize) -> Result<()> { - let config = SessionConfig::new() - .with_repartition_file_scans(true) - .with_repartition_file_min_size(0) - .with_target_partitions(n_partitions); - let ctx = SessionContext::new_with_config(config); + async fn test_csv_empty_with_header() -> Result<()> { + let ctx = SessionContext::new(); ctx.register_csv( "empty", "tests/data/empty.csv", CsvReadOptions::new().has_header(true), ) .await?; - // Require a predicate to enable repartition for the optimizer let query = "select * from empty where random() > 0.5;"; let query_result = ctx.sql(query).await?.collect().await?; - let actual_partitions = count_query_csv_partitions(&ctx, query).await?; Review Comment: this checks that the plan has a `CsvExec` node, which we no longer have (now it's a `TableScan: empty` / `EmptyExec`) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org