kosiew commented on code in PR #16583: URL: https://github.com/apache/datafusion/pull/16583#discussion_r2181718449
########## datafusion/core/src/datasource/listing/table.rs: ########## @@ -123,25 +176,72 @@ impl ListingTableConfig { /// /// If the schema is provided, it must contain only the fields in the file /// without the table partitioning columns. + /// + /// # Example: Specifying Table Schema + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{Schema, Field, DataType}; + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// let schema = Arc::new(Schema::new(vec![ + /// Field::new("id", DataType::Int64, false), + /// Field::new("name", DataType::Utf8, true), + /// ])); + /// + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(listing_options) // Set options first + /// .with_schema(schema); // Then set schema + /// ``` pub fn with_schema(self, schema: SchemaRef) -> Self { + // Note: We preserve existing options state, but downstream code may expect + // options to be set. Consider calling with_listing_options() or infer_options() + // before operations that require options to be present. + debug_assert!( + self.options.is_some() || cfg!(test), + "ListingTableConfig::with_schema called without options set. \ + Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code." + ); + Self { - table_paths: self.table_paths, file_schema: Some(schema), - options: self.options, schema_source: SchemaSource::Specified, + ..self Review Comment: with_schema is required so that SchemaAdapter knows which target schema to adapt to. I added debug_assert to ensure future changes do not unwittingly change self.options to be None. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org