kosiew commented on code in PR #16696:
URL: https://github.com/apache/datafusion/pull/16696#discussion_r2389737067
##########
datafusion/optimizer/tests/optimizer_integration.rs:
##########
@@ -478,6 +514,90 @@ fn
select_correlated_predicate_subquery_with_uppercase_ident() {
);
}
+#[test]
+fn recursive_cte_projection_pushdown() -> Result<()> {
+ // Test that projection pushdown works with recursive CTEs by ensuring
+ // only the required columns are projected from the base table, even when
+ // the CTE definition includes unused columns
+ let sql = "WITH RECURSIVE nodes AS (\
+ SELECT col_int32 AS id, col_utf8 AS name, col_uint32 AS extra FROM
test \
+ UNION ALL \
+ SELECT id + 1, name, extra FROM nodes WHERE id < 3\
+ ) SELECT id FROM nodes";
+ let plan = test_sql(sql)?;
+
+ // The optimizer successfully performs projection pushdown by only
selecting the needed
+ // columns from the base table and recursive table, eliminating unused
columns
+ assert_snapshot!(
+ format!("{plan}"),
+ @r#"SubqueryAlias: nodes
+ RecursiveQuery: is_distinct=false
+ Projection: test.col_int32 AS id
+ TableScan: test projection=[col_int32]
+ Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
+ Filter: nodes.id < Int32(3)
+ TableScan: nodes projection=[id]
+"#
+ );
+ Ok(())
+}
+
+#[test]
+fn recursive_cte_with_unused_columns() -> Result<()> {
+ // Test projection pushdown with a recursive CTE where the base case
+ // includes columns that are never used in the recursive part or final
result
+ let sql = "WITH RECURSIVE series AS (\
+ SELECT 1 AS n, col_utf8, col_uint32, col_date32 FROM test WHERE
col_int32 = 1 \
+ UNION ALL \
+ SELECT n + 1, col_utf8, col_uint32, col_date32 FROM series WHERE n < 3\
+ ) SELECT n FROM series";
+ let plan = test_sql(sql)?;
+
+ // The optimizer successfully performs projection pushdown by eliminating
unused columns
+ // even when they're defined in the CTE but not actually needed
+ assert_snapshot!(
+ format!("{plan}"),
+ @r#"SubqueryAlias: series
+ RecursiveQuery: is_distinct=false
+ Projection: Int64(1) AS n
+ Filter: test.col_int32 = Int32(1)
+ TableScan: test projection=[col_int32]
+ Projection: series.n + Int64(1)
+ Filter: series.n < Int64(3)
+ TableScan: series projection=[n]
+"#
+ );
+ Ok(())
+}
+
+#[test]
+fn recursive_cte_true_projection_pushdown() -> Result<()> {
+ // Test case that truly demonstrates projection pushdown working:
+ // The base case only selects needed columns
Review Comment:
The two preceding tests deliberately start with extra columns in either the
base term or the recursive term and check that we trim them back to just the
referenced field (TableScan: test projection=[col_int32] / TableScan:
nodes|series projection=[id|n]). The “true” case is meant to be the baseline:
once everything is pruned, the recursive query should remain at that minimal
shape (TableScan: test projection=[col_int32] and TableScan: countdown
projection=[n]). I’ll rename the test (e.g.
recursive_cte_projection_pushdown_baseline) and expand the comment so the
distinction is clearer.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]