adriangb commented on code in PR #19404:
URL: https://github.com/apache/datafusion/pull/19404#discussion_r2677314268
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -1270,4 +1302,357 @@ mod tests {
);
assert!(stats.total_byte_size.is_exact().unwrap_or(false));
}
+
+ #[test]
+ fn test_filter_pushdown_with_alias() -> Result<()> {
+ let input_schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics::new_unknown(&input_schema),
+ input_schema.clone(),
+ ));
+
+ // project "a" as "b"
+ let projection = ProjectionExec::try_new(
+ vec![ProjectionExpr {
+ expr: Arc::new(Column::new("a", 0)),
+ alias: "b".to_string(),
+ }],
+ input,
+ )?;
+
+ // filter "b > 5"
+ let filter = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("b", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter],
+ &ConfigOptions::default(),
+ )?;
+
+ // Should be converted to "a > 5"
+ // "a" is index 0 in input
+ let expected_filter = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ assert_eq!(description.self_filters(), vec![vec![]]);
+ let pushed_filters = &description.parent_filters()[0];
+ assert_eq!(
+ format!("{}", pushed_filters[0].predicate),
+ format!("{}", expected_filter)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_multiple_aliases() -> Result<()> {
+ let input_schema = Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ ]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ column_statistics: vec![Default::default();
input_schema.fields().len()],
+ ..Default::default()
+ },
+ input_schema.clone(),
+ ));
+
+ // project "a" as "x", "b" as "y"
+ let projection = ProjectionExec::try_new(
+ vec![
+ ProjectionExpr {
+ expr: Arc::new(Column::new("a", 0)),
+ alias: "x".to_string(),
+ },
+ ProjectionExpr {
+ expr: Arc::new(Column::new("b", 1)),
+ alias: "y".to_string(),
+ },
+ ],
+ input,
+ )?;
+
+ // filter "x > 5"
+ let filter1 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("x", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ // filter "y < 10"
+ let filter2 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("y", 1)),
+ Operator::Lt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter1, filter2],
+ &ConfigOptions::default(),
+ )?;
+
+ // Should be converted to "a > 5" and "b < 10"
+ let expected_filter1 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let expected_filter2 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("b", 1)),
+ Operator::Lt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let pushed_filters = &description.parent_filters()[0];
+ assert_eq!(pushed_filters.len(), 2);
+ // Note: The order of filters is preserved
+ assert_eq!(
+ format!("{}", pushed_filters[0].predicate),
+ format!("{}", expected_filter1)
+ );
+ assert_eq!(
+ format!("{}", pushed_filters[1].predicate),
+ format!("{}", expected_filter2)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_mixed_columns() -> Result<()> {
+ let input_schema = Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ ]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ column_statistics: vec![Default::default();
input_schema.fields().len()],
+ ..Default::default()
+ },
+ input_schema.clone(),
+ ));
+
+ // project "a" as "x", "b" as "b" (pass through)
+ let projection = ProjectionExec::try_new(
+ vec![
+ ProjectionExpr {
+ expr: Arc::new(Column::new("a", 0)),
+ alias: "x".to_string(),
+ },
+ ProjectionExpr {
+ expr: Arc::new(Column::new("b", 1)),
+ alias: "b".to_string(),
+ },
+ ],
+ input,
+ )?;
+
+ // filter "x > 5"
+ let filter1 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("x", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(5)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ // filter "b < 10" (using output index 1 which corresponds to 'b')
+ let filter2 = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("b", 1)),
+ Operator::Lt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter1, filter2],
+ &ConfigOptions::default(),
+ )?;
+
+ let pushed_filters = &description.parent_filters()[0];
+ assert_eq!(pushed_filters.len(), 2);
+ // "x" -> "a" (index 0)
+ let expected_filter1 = "a@0 > 5";
+ // "b" -> "b" (index 1)
+ let expected_filter2 = "b@1 < 10";
+
+ assert_eq!(format!("{}", pushed_filters[0].predicate),
expected_filter1);
+ assert_eq!(format!("{}", pushed_filters[1].predicate),
expected_filter2);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_complex_expression() -> Result<()> {
+ let input_schema = Schema::new(vec![Field::new("a", DataType::Int32,
false)]);
+ let input = Arc::new(StatisticsExec::new(
+ Statistics {
+ column_statistics: vec![Default::default();
input_schema.fields().len()],
+ ..Default::default()
+ },
+ input_schema.clone(),
+ ));
+
+ // project "a + 1" as "z"
+ let projection = ProjectionExec::try_new(
+ vec![ProjectionExpr {
+ expr: Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("a", 0)),
+ Operator::Plus,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+ )),
+ alias: "z".to_string(),
+ }],
+ input,
+ )?;
+
+ // filter "z > 10"
+ let filter = Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("z", 0)),
+ Operator::Gt,
+ Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
+ )) as Arc<dyn PhysicalExpr>;
+
+ let description = projection.gather_filters_for_pushdown(
+ FilterPushdownPhase::Post,
+ vec![filter],
+ &ConfigOptions::default(),
+ )?;
+
+ // expand to `a + 1 > 10`
+ let pushed_filters = &description.parent_filters()[0];
+ assert!(matches!(pushed_filters[0].discriminant, PushedDown::Yes));
+ assert_eq!(format!("{}", pushed_filters[0].predicate), "a@0 + 1 > 10");
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_filter_pushdown_with_unknown_column() -> Result<()> {
Review Comment:
Thanks! I'll review again but yeah I think if you match what I did in
https://github.com/apache/datafusion/commit/7ee2bfb353c30764f848491298f2124421f617c7
everything will work in a less confusing way (which may be what you did, I
need to re-review).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]