alamb commented on code in PR #14685: URL: https://github.com/apache/datafusion/pull/14685#discussion_r1975526113
########## datafusion/datasource/src/file_scan_config.rs: ########## @@ -433,54 +494,13 @@ impl FileScanConfig { ); } - let proj_indices = if let Some(proj) = &self.projection { - proj - } else { - let len = self.file_schema.fields().len() + self.table_partition_cols.len(); - &(0..len).collect::<Vec<_>>() - }; - - let mut table_fields = vec![]; - let mut table_cols_stats = vec![]; - for idx in proj_indices { - if *idx < self.file_schema.fields().len() { - let field = self.file_schema.field(*idx); - table_fields.push(field.clone()); - table_cols_stats.push(self.statistics.column_statistics[*idx].clone()) - } else { - let partition_idx = idx - self.file_schema.fields().len(); - table_fields.push(self.table_partition_cols[partition_idx].to_owned()); - // TODO provide accurate stat for partition column (#1186) - table_cols_stats.push(ColumnStatistics::new_unknown()) - } - } - - let table_stats = Statistics { - num_rows: self.statistics.num_rows, - // TODO correct byte size? - total_byte_size: Precision::Absent, - column_statistics: table_cols_stats, - }; - - let projected_schema = Arc::new(Schema::new_with_metadata( - table_fields, - self.file_schema.metadata().clone(), - )); - - let projected_constraints = self - .constraints - .project(proj_indices) - .unwrap_or_else(Constraints::empty); + let schema = self.projected_schema(); Review Comment: this looks really nice now ########## datafusion/proto/tests/cases/roundtrip_physical_plan.rs: ########## @@ -1626,7 +1659,6 @@ async fn roundtrip_parquet_select_star_predicate() -> Result<()> { roundtrip_test_sql_with_context(sql, &ctx).await } -#[ignore = "Test failing due to https://github.com/apache/datafusion/issues/14679"] Review Comment: 🎉 ########## datafusion/core/src/datasource/file_format/parquet.rs: ########## @@ -1934,7 +1934,8 @@ mod tests { // test metadata assert_eq!(exec.statistics()?.num_rows, Precision::Exact(8)); - assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671)); + // assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671)); + // todo: uncomment when FileScanConfig::projection_stats puts byte size Review Comment: I filed this ticket to track: - https://github.com/apache/datafusion/issues/14936 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org