alamb commented on code in PR #14838: URL: https://github.com/apache/datafusion/pull/14838#discussion_r1968468146
########## datafusion/datasource/src/file_scan_config.rs: ########## @@ -15,19 +15,611 @@ // specific language governing permissions and limitations // under the License. -use std::{borrow::Cow, collections::HashMap, marker::PhantomData, sync::Arc}; +//! [`FileScanConfig`] to configure scanning of possibly partitioned +//! file sources. + +use std::{ + any::Any, borrow::Cow, collections::HashMap, fmt::Debug, fmt::Formatter, + fmt::Result as FmtResult, marker::PhantomData, sync::Arc, +}; use arrow::{ array::{ ArrayData, ArrayRef, BufferBuilder, DictionaryArray, RecordBatch, RecordBatchOptions, }, buffer::Buffer, - datatypes::{ArrowNativeType, DataType, SchemaRef, UInt16Type}, + datatypes::{ArrowNativeType, DataType, Field, Schema, SchemaRef, UInt16Type}, +}; +use datafusion_common::{ + exec_err, stats::Precision, ColumnStatistics, Constraints, Result, Statistics, }; -use datafusion_common::{exec_err, Result}; use datafusion_common::{DataFusionError, ScalarValue}; -use log::warn; +use datafusion_execution::{ + object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext, +}; +use datafusion_physical_expr::{ + expressions::Column, EquivalenceProperties, LexOrdering, Partitioning, + PhysicalSortExpr, +}; +use datafusion_physical_plan::{ + display::{display_orderings, ProjectSchemaDisplay}, + metrics::ExecutionPlanMetricsSet, + projection::{all_alias_free_columns, new_projections_for_columns, ProjectionExec}, + DisplayAs, DisplayFormatType, ExecutionPlan, +}; +use log::{debug, warn}; + +use crate::{ + display::FileGroupsDisplay, + file::FileSource, + file_compression_type::FileCompressionType, + file_stream::FileStream, + source::{DataSource, DataSourceExec}, + statistics::MinMaxStatistics, + PartitionedFile, +}; + +/// The base configurations for a [`DataSourceExec`], the a physical plan for +/// any given file format. +/// +/// Use [`Self::build`] to create a [`DataSourceExec`] from a ``FileScanConfig`. +/// +/// # Example +/// ```ignore +/// # use std::sync::Arc; +/// # use arrow::datatypes::{Field, Fields, DataType, Schema}; +/// # use datafusion_datasource::PartitionedFile; +/// # use datafusion_datasource::file_scan_config::FileScanConfig; +/// # use datafusion_execution::object_store::ObjectStoreUrl; +/// # use datafusion::datasource::physical_plan::ArrowSource; Review Comment: This is a weird example anyways because it actually refers to `ParquetFiles` I took the liberty of pushing a commit to this branch to restore the doc test by mocking out a ParquetSource in 35e6ab727 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org