alamb commented on code in PR #16305: URL: https://github.com/apache/datafusion/pull/16305#discussion_r2135649433
########## datafusion/core/src/datasource/listing/table.rs: ########## @@ -17,44 +17,53 @@ //! The table implementation. -use std::collections::HashMap; -use std::{any::Any, str::FromStr, sync::Arc}; - -use super::helpers::{expr_applicable_for_cols, pruned_partition_list}; -use super::{ListingTableUrl, PartitionedFile}; -use crate::datasource::{ - create_ordering, - file_format::{file_compression_type::FileCompressionType, FileFormat}, - physical_plan::FileSinkConfig, +use super::{ + helpers::{expr_applicable_for_cols, pruned_partition_list}, + ListingTableUrl, PartitionedFile, +}; +use crate::{ + datasource::file_format::{file_compression_type::FileCompressionType, FileFormat}, + datasource::{create_ordering, physical_plan::FileSinkConfig}, + execution::context::SessionState, }; -use crate::execution::context::SessionState; - use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef}; use arrow_schema::Schema; +use async_trait::async_trait; use datafusion_catalog::{Session, TableProvider}; -use datafusion_common::stats::Precision; use datafusion_common::{ config_datafusion_err, config_err, internal_err, plan_err, project_schema, - Constraints, DataFusionError, Result, SchemaExt, + stats::Precision, Constraints, DataFusionError, Result, SchemaExt, }; -use datafusion_datasource::compute_all_files_statistics; -use datafusion_datasource::file_groups::FileGroup; -use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder}; -use datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory; -use datafusion_execution::cache::{ - cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache, +use datafusion_datasource::{ + compute_all_files_statistics, + file_groups::FileGroup, + file_scan_config::{FileScanConfig, FileScanConfigBuilder}, + schema_adapter::DefaultSchemaAdapterFactory, +}; +use datafusion_execution::{ + cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache}, + config::SessionConfig, +}; +use datafusion_expr::{ + dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType, }; -use datafusion_execution::config::SessionConfig; -use datafusion_expr::dml::InsertOp; -use datafusion_expr::{Expr, SortExpr, TableProviderFilterPushDown, TableType}; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_physical_plan::empty::EmptyExec; -use datafusion_physical_plan::{ExecutionPlan, Statistics}; - -use async_trait::async_trait; +use datafusion_physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics}; use futures::{future, stream, Stream, StreamExt, TryStreamExt}; use itertools::Itertools; use object_store::ObjectStore; +use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc}; +/// Indicates the source of the schema for a [`ListingTable`] +/// PartialEq required for assert_eq! in tests Review Comment: I personally think we don't have to explain the `PartialEq` in the user facing documentation. Maybe we could do this as a `//` normal comment ```suggestion // PartialEq required for assert_eq! in tests ``` ########## datafusion/core/src/datasource/listing/table.rs: ########## @@ -17,44 +17,53 @@ //! The table implementation. -use std::collections::HashMap; -use std::{any::Any, str::FromStr, sync::Arc}; - -use super::helpers::{expr_applicable_for_cols, pruned_partition_list}; -use super::{ListingTableUrl, PartitionedFile}; -use crate::datasource::{ - create_ordering, - file_format::{file_compression_type::FileCompressionType, FileFormat}, - physical_plan::FileSinkConfig, +use super::{ + helpers::{expr_applicable_for_cols, pruned_partition_list}, + ListingTableUrl, PartitionedFile, +}; +use crate::{ + datasource::file_format::{file_compression_type::FileCompressionType, FileFormat}, + datasource::{create_ordering, physical_plan::FileSinkConfig}, + execution::context::SessionState, }; -use crate::execution::context::SessionState; - use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef}; use arrow_schema::Schema; +use async_trait::async_trait; use datafusion_catalog::{Session, TableProvider}; -use datafusion_common::stats::Precision; use datafusion_common::{ config_datafusion_err, config_err, internal_err, plan_err, project_schema, - Constraints, DataFusionError, Result, SchemaExt, + stats::Precision, Constraints, DataFusionError, Result, SchemaExt, }; -use datafusion_datasource::compute_all_files_statistics; -use datafusion_datasource::file_groups::FileGroup; -use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder}; -use datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory; -use datafusion_execution::cache::{ - cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache, +use datafusion_datasource::{ + compute_all_files_statistics, + file_groups::FileGroup, + file_scan_config::{FileScanConfig, FileScanConfigBuilder}, + schema_adapter::DefaultSchemaAdapterFactory, +}; +use datafusion_execution::{ + cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache}, + config::SessionConfig, +}; +use datafusion_expr::{ + dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType, }; -use datafusion_execution::config::SessionConfig; -use datafusion_expr::dml::InsertOp; -use datafusion_expr::{Expr, SortExpr, TableProviderFilterPushDown, TableType}; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_physical_plan::empty::EmptyExec; -use datafusion_physical_plan::{ExecutionPlan, Statistics}; - -use async_trait::async_trait; +use datafusion_physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics}; use futures::{future, stream, Stream, StreamExt, TryStreamExt}; use itertools::Itertools; use object_store::ObjectStore; +use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc}; +/// Indicates the source of the schema for a [`ListingTable`] +/// PartialEq required for assert_eq! in tests +#[derive(Debug, Clone, PartialEq)] +pub enum SchemaSource { Review Comment: 100% -- this is great cc @adriangb ########## datafusion/core/src/datasource/listing/table.rs: ########## @@ -768,6 +808,9 @@ impl ListingTable { /// /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`] pub fn try_new(config: ListingTableConfig) -> Result<Self> { + // Extract schema_source before moving other parts of the config + let schema_source = config.schema_source().clone(); Review Comment: Minor nit: since SchemaSource is just a bare enum, it might also make sense to `#derive(Copy)` so we didn't have to clone it explicitly. This is totally unecessary I just wanted to mention it as a possibility -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org