adriangb commented on code in PR #16461: URL: https://github.com/apache/datafusion/pull/16461#discussion_r2157496405
########## datafusion/datasource-parquet/src/opener.rs: ########## @@ -524,6 +532,62 @@ fn should_enable_page_index( .unwrap_or(false) } +use datafusion_physical_expr::expressions; + +/// Given a [`PhysicalExpr`] and a [`SchemaRef`], returns a new [`PhysicalExpr`] that +/// is cast to the specified data type. +/// Preference is always given to casting literal values to the data type of the column +/// since casting the column to the literal value's data type can be significantly more expensive. +/// Given two columns the cast is applied arbitrarily to the first column. +pub fn cast_expr_to_schema( + expr: Arc<dyn PhysicalExpr>, + physical_file_schema: &Schema, + logical_file_schema: &Schema, +) -> Result<Arc<dyn PhysicalExpr>> { + expr.transform(|expr| { + if let Some(column) = expr.as_any().downcast_ref::<expressions::Column>() { + let logical_field = logical_file_schema.field_with_name(column.name())?; + let Ok(physical_field) = physical_file_schema.field_with_name(column.name()) + else { + if !logical_field.is_nullable() { + return exec_err!( + "Non-nullable column '{}' is missing from the physical schema", + column.name() + ); Review Comment: Might be useful to include some sort of file identifier here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org