alamb commented on code in PR #17675:
URL: https://github.com/apache/datafusion/pull/17675#discussion_r2383495977
##########
datafusion/physical-expr/src/planner.rs:
##########
@@ -405,18 +405,17 @@ where
.collect()
}
-/// Convert a logical expression to a physical expression (without any
simplification, etc)
-pub fn logical2physical(expr: &Expr, schema: &Schema) -> Arc<dyn PhysicalExpr>
{
- // TODO this makes a deep copy of the Schema. Should take SchemaRef
instead and avoid deep copy
Review Comment:
🎉
##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -696,43 +702,43 @@ impl LogicalPlanBuilder {
curr_plan: LogicalPlan,
missing_cols: &IndexSet<Column>,
is_distinct: bool,
- ) -> Result<LogicalPlan> {
+ ) -> Result<Transformed<LogicalPlan>> {
match curr_plan {
LogicalPlan::Projection(Projection {
input,
mut expr,
- schema: _,
+ schema,
}) if missing_cols.iter().all(|c| input.schema().has_column(c)) =>
{
- let mut missing_exprs = missing_cols
+ let missing_exprs: Vec<Expr> = missing_cols
.iter()
.map(|c| normalize_col(Expr::Column(c.clone()), &input))
- .collect::<Result<Vec<_>>>()?;
+ // Do not let duplicate columns to be added, some of the
+ // missing_cols may be already present but without the new
+ // projected alias.
+ .filter_ok(|e| !expr.contains(e))
+ .try_collect()?;
- // Do not let duplicate columns to be added, some of the
- // missing_cols may be already present but without the new
- // projected alias.
- missing_exprs.retain(|e| !expr.contains(e));
if is_distinct {
Self::ambiguous_distinct_check(&missing_exprs,
missing_cols, &expr)?;
}
- expr.extend(missing_exprs);
- project(Arc::unwrap_or_clone(input), expr)
+
+ if missing_exprs.is_empty() {
+ Ok(Transformed::no(LogicalPlan::Projection(Projection {
+ input,
+ expr,
+ schema,
+ })))
+ } else {
+ expr.extend(missing_exprs);
+ project(input, expr).map(Transformed::yes)
+ }
}
_ => {
let is_distinct =
is_distinct || matches!(curr_plan,
LogicalPlan::Distinct(_));
- let new_inputs = curr_plan
- .inputs()
- .into_iter()
- .map(|input_plan| {
- Self::add_missing_columns(
- (*input_plan).clone(),
- missing_cols,
- is_distinct,
- )
- })
- .collect::<Result<Vec<_>>>()?;
- curr_plan.with_new_exprs(curr_plan.expressions(), new_inputs)
Review Comment:
here is one example of avoiding a clone, I think
##########
datafusion/datasource-parquet/src/row_filter.rs:
##########
@@ -548,24 +547,23 @@ mod test {
// This is the schema we would like to coerce to,
// which is different from the physical schema of the file.
- let table_schema = Schema::new(vec![Field::new(
+ let table_schema = Arc::new(Schema::new(vec![Field::new(
"timestamp_col",
DataType::Timestamp(Nanosecond, Some(Arc::from("UTC"))),
false,
- )]);
+ )]));
// Test all should fail
let expr = col("timestamp_col").lt(Expr::Literal(
ScalarValue::TimestampNanosecond(Some(1), Some(Arc::from("UTC"))),
None,
));
- let expr = logical2physical(&expr, &table_schema);
+ let expr = logical2physical(&expr, Arc::clone(&table_schema));
let schema_adapter_factory = Arc::new(DefaultSchemaAdapterFactory);
- let table_schema = Arc::new(table_schema.clone());
let candidate = FilterCandidateBuilder::new(
expr,
- file_schema.clone(),
- table_schema.clone(),
+ Arc::clone(&file_schema),
Review Comment:
I do love all these `Arc::clone` as it makes it clear we aren't cloning
schema a bunch
##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1994,32 +2005,29 @@ pub fn table_scan_with_filter_and_fetch(
)
}
-pub fn table_source(table_schema: &Schema) -> Arc<dyn TableSource> {
- // TODO should we take SchemaRef and avoid cloning?
Review Comment:
🎉 yes we should!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]