alamb commented on code in PR #16785: URL: https://github.com/apache/datafusion/pull/16785#discussion_r2208580080
########## datafusion/core/tests/dataframe/mod.rs: ########## @@ -6193,3 +6194,59 @@ async fn test_copy_schema() -> Result<()> { assert_logical_expr_schema_eq_physical_expr_schema(result).await?; Ok(()) } + +#[tokio::test] +async fn test_copy_to_preserves_order() -> Result<()> { Review Comment: I verified that this test fails without the code change in the PR: ``` ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Snapshot Summary ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Snapshot: copy_to_preserves_order Source: datafusion/core/tests/dataframe/mod.rs:6241 ──────────────────────────────────────────────────────────────────────────────── Expression: physical_plan_format ──────────────────────────────────────────────────────────────────────────────── -old snapshot +new results ────────────┬─────────────────────────────────────────────────────────────────── 0 0 │ UnionExec 1 1 │ DataSinkExec: sink=CsvSink(file_groups=[]) 2 │- SortExec: expr=[column1@0 DESC], preserve_partitioning=[false] 3 │- DataSourceExec: partitions=1, partition_sizes=[1] 2 │+ DataSourceExec: partitions=1, partition_sizes=[1] 4 3 │ DataSourceExec: partitions=1, partition_sizes=[1] ────────────┴─────────────────────────────────────────────────────────────────── ``` ########## datafusion/core/tests/dataframe/mod.rs: ########## @@ -6193,3 +6194,59 @@ async fn test_copy_schema() -> Result<()> { assert_logical_expr_schema_eq_physical_expr_schema(result).await?; Ok(()) } + +#[tokio::test] +async fn test_copy_to_preserves_order() -> Result<()> { + let tmp_dir = TempDir::new()?; + + let session_state = SessionStateBuilder::new_with_default_features().build(); + let session_ctx = SessionContext::new_with_state(session_state); + + let target_path = tmp_dir.path().join("target_ordered.csv"); + let csv_file_format = session_ctx + .state() + .get_file_format_factory("csv") + .map(format_as_file_type) + .unwrap(); + + let ordered_select_plan = LogicalPlanBuilder::values(vec![ + vec![lit(1u64)], + vec![lit(10u64)], + vec![lit(20u64)], + vec![lit(100u64)], + ])? + .sort(vec![SortExpr::new(col("column1"), false, true)])? + .build()?; + + let copy_to_plan = LogicalPlanBuilder::copy_to( + ordered_select_plan, + target_path.to_str().unwrap().to_string(), + csv_file_format, + HashMap::new(), + vec![], + )? + .build()?; + + let union_side_branch = LogicalPlanBuilder::values(vec![vec![lit(1u64)]])?.build()?; + let union_plan = LogicalPlanBuilder::from(copy_to_plan) + .union(union_side_branch)? + .build()?; + + let frame = session_ctx.execute_logical_plan(union_plan).await?; + let physical_plan = frame.create_physical_plan().await?; + + let physical_plan_format = + displayable(physical_plan.as_ref()).indent(true).to_string(); + + assert_snapshot!( Review Comment: i think it would also be worth a comment here explaining what is expected in the test. Something like ```suggestion // Expect that input to the DataSinkExec is sorted correctly assert_snapshot!( ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org