adriangb commented on code in PR #18253:
URL: https://github.com/apache/datafusion/pull/18253#discussion_r2464243726
##########
datafusion/physical-expr/src/projection.rs:
##########
@@ -100,40 +100,65 @@ impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>,
String) {
/// representing a complete projection operation and provides
/// methods to manipulate and analyze the projection as a whole.
#[derive(Debug, Clone)]
-pub struct Projection {
+pub struct ProjectionExprs {
exprs: Vec<ProjectionExpr>,
}
-impl std::fmt::Display for Projection {
+impl std::fmt::Display for ProjectionExprs {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let exprs: Vec<String> = self.exprs.iter().map(|e|
e.to_string()).collect();
write!(f, "Projection[{}]", exprs.join(", "))
}
}
-impl From<Vec<ProjectionExpr>> for Projection {
+impl From<Vec<ProjectionExpr>> for ProjectionExprs {
fn from(value: Vec<ProjectionExpr>) -> Self {
Self { exprs: value }
}
}
-impl From<&[ProjectionExpr]> for Projection {
+impl From<&[ProjectionExpr]> for ProjectionExprs {
fn from(value: &[ProjectionExpr]) -> Self {
Self {
exprs: value.to_vec(),
}
}
}
-impl AsRef<[ProjectionExpr]> for Projection {
+impl FromIterator<ProjectionExpr> for ProjectionExprs {
+ fn from_iter<T: IntoIterator<Item = ProjectionExpr>>(exprs: T) -> Self {
+ Self {
+ exprs: exprs.into_iter().collect::<Vec<_>>(),
+ }
+ }
+}
+
+impl AsRef<[ProjectionExpr]> for ProjectionExprs {
fn as_ref(&self) -> &[ProjectionExpr] {
&self.exprs
}
}
-impl Projection {
- pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
- Self { exprs }
+impl ProjectionExprs {
+ pub fn new<I>(exprs: I) -> Self
+ where
+ I: IntoIterator<Item = ProjectionExpr>,
+ {
+ Self {
+ exprs: exprs.into_iter().collect::<Vec<_>>(),
+ }
+ }
+
+ pub fn from_indices(indices: &[usize], schema: &SchemaRef) -> Self {
Review Comment:
Please add documentation, including handling of duplicates, ordering, etc.
##########
datafusion/datasource/src/file_scan_config.rs:
##########
@@ -455,6 +464,10 @@ impl FileScanConfigBuilder {
file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
let new_lines_in_values = new_lines_in_values.unwrap_or(false);
+ let projection = projection_indices.as_ref().map(|indices| {
+ ProjectionExprs::from_indices(indices, table_schema.table_schema())
+ });
Review Comment:
Note: this is because we are not changing `FileScanConfigBuilder`. I think
it makes sense to change it at some point but it's not necessary yet and we can
keep things as backwards compatible as possible for as long as possible.
##########
datafusion/physical-expr/src/projection.rs:
##########
@@ -256,6 +281,20 @@ impl Projection {
.collect_vec()
}
+ /// Extract the ordered column indices for a column-only projection.
Review Comment:
Please add more detailed documentation, e.g. what happens if the projection
contains non-column expressions, what if the column expressions are nested
within other expressions, etc.
##########
datafusion/physical-expr/src/projection.rs:
##########
@@ -100,40 +100,65 @@ impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>,
String) {
/// representing a complete projection operation and provides
/// methods to manipulate and analyze the projection as a whole.
#[derive(Debug, Clone)]
-pub struct Projection {
+pub struct ProjectionExprs {
Review Comment:
I do think this name is better and it's not a breaking change since
`Projection` was introduced after the last release. To make sure we get this
through as fast as possible (in particular before it does become a breaking
change) could you make this it's own PR?
##########
datafusion/datasource/src/file_scan_config.rs:
##########
@@ -697,7 +711,7 @@ impl FileScanConfig {
fn projection_indices(&self) -> Vec<usize> {
match &self.projection {
- Some(proj) => proj.clone(),
+ Some(proj) => proj.ordered_column_indices(),
Review Comment:
I similarly think long term we want to get rid of `projection_indices` and
some of these other helper functions (a lot of it drops away once we have the
projection expressions because we can re-use the same machinery that
`ProjectionExec` uses) but for now we do what we can to keep it backwards
compatible and minimize churn.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]