alamb commented on code in PR #14685:
URL: https://github.com/apache/datafusion/pull/14685#discussion_r1975526113


##########
datafusion/datasource/src/file_scan_config.rs:
##########
@@ -433,54 +494,13 @@ impl FileScanConfig {
             );
         }
 
-        let proj_indices = if let Some(proj) = &self.projection {
-            proj
-        } else {
-            let len = self.file_schema.fields().len() + 
self.table_partition_cols.len();
-            &(0..len).collect::<Vec<_>>()
-        };
-
-        let mut table_fields = vec![];
-        let mut table_cols_stats = vec![];
-        for idx in proj_indices {
-            if *idx < self.file_schema.fields().len() {
-                let field = self.file_schema.field(*idx);
-                table_fields.push(field.clone());
-                
table_cols_stats.push(self.statistics.column_statistics[*idx].clone())
-            } else {
-                let partition_idx = idx - self.file_schema.fields().len();
-                
table_fields.push(self.table_partition_cols[partition_idx].to_owned());
-                // TODO provide accurate stat for partition column (#1186)
-                table_cols_stats.push(ColumnStatistics::new_unknown())
-            }
-        }
-
-        let table_stats = Statistics {
-            num_rows: self.statistics.num_rows,
-            // TODO correct byte size?
-            total_byte_size: Precision::Absent,
-            column_statistics: table_cols_stats,
-        };
-
-        let projected_schema = Arc::new(Schema::new_with_metadata(
-            table_fields,
-            self.file_schema.metadata().clone(),
-        ));
-
-        let projected_constraints = self
-            .constraints
-            .project(proj_indices)
-            .unwrap_or_else(Constraints::empty);
+        let schema = self.projected_schema();

Review Comment:
   this looks really nice now



##########
datafusion/proto/tests/cases/roundtrip_physical_plan.rs:
##########
@@ -1626,7 +1659,6 @@ async fn roundtrip_parquet_select_star_predicate() -> 
Result<()> {
     roundtrip_test_sql_with_context(sql, &ctx).await
 }
 
-#[ignore = "Test failing due to 
https://github.com/apache/datafusion/issues/14679";]

Review Comment:
   🎉 



##########
datafusion/core/src/datasource/file_format/parquet.rs:
##########
@@ -1934,7 +1934,8 @@ mod tests {
 
         // test metadata
         assert_eq!(exec.statistics()?.num_rows, Precision::Exact(8));
-        assert_eq!(exec.statistics()?.total_byte_size, Precision::Exact(671));
+        // assert_eq!(exec.statistics()?.total_byte_size, 
Precision::Exact(671));
+        // todo: uncomment when FileScanConfig::projection_stats puts byte size

Review Comment:
   I filed this ticket to track:
   - https://github.com/apache/datafusion/issues/14936



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to