kosiew commented on code in PR #16583:
URL: https://github.com/apache/datafusion/pull/16583#discussion_r2181718449


##########
datafusion/core/src/datasource/listing/table.rs:
##########
@@ -123,25 +176,72 @@ impl ListingTableConfig {
     ///
     /// If the schema is provided, it must contain only the fields in the file
     /// without the table partitioning columns.
+    ///
+    /// # Example: Specifying Table Schema
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTableConfig, 
ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # let table_paths = 
ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = 
ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("id", DataType::Int64, false),
+    ///     Field::new("name", DataType::Utf8, true),
+    /// ]));
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)  // Set options first
+    ///     .with_schema(schema);                    // Then set schema
+    /// ```
     pub fn with_schema(self, schema: SchemaRef) -> Self {
+        // Note: We preserve existing options state, but downstream code may 
expect
+        // options to be set. Consider calling with_listing_options() or 
infer_options()
+        // before operations that require options to be present.
+        debug_assert!(
+            self.options.is_some() || cfg!(test),
+            "ListingTableConfig::with_schema called without options set. \
+             Consider calling with_listing_options() or infer_options() first 
to avoid panics in downstream code."
+        );
+
         Self {
-            table_paths: self.table_paths,
             file_schema: Some(schema),
-            options: self.options,
             schema_source: SchemaSource::Specified,
+            ..self

Review Comment:
   This is a minor refactor because I `derive Default` for ListingTableConfig



##########
datafusion/core/src/datasource/listing/table.rs:
##########
@@ -123,25 +176,72 @@ impl ListingTableConfig {
     ///
     /// If the schema is provided, it must contain only the fields in the file
     /// without the table partitioning columns.
+    ///
+    /// # Example: Specifying Table Schema
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTableConfig, 
ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # let table_paths = 
ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = 
ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("id", DataType::Int64, false),
+    ///     Field::new("name", DataType::Utf8, true),
+    /// ]));
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)  // Set options first
+    ///     .with_schema(schema);                    // Then set schema
+    /// ```
     pub fn with_schema(self, schema: SchemaRef) -> Self {
+        // Note: We preserve existing options state, but downstream code may 
expect
+        // options to be set. Consider calling with_listing_options() or 
infer_options()
+        // before operations that require options to be present.
+        debug_assert!(
+            self.options.is_some() || cfg!(test),
+            "ListingTableConfig::with_schema called without options set. \
+             Consider calling with_listing_options() or infer_options() first 
to avoid panics in downstream code."
+        );
+
         Self {
-            table_paths: self.table_paths,
             file_schema: Some(schema),
-            options: self.options,
             schema_source: SchemaSource::Specified,
+            ..self
         }
     }
 
     /// Add `listing_options` to [`ListingTableConfig`]
     ///
     /// If not provided, format and other options are inferred via
     /// [`Self::infer_options`].
+    ///
+    /// # Example: Configuring Parquet Files with Custom Options
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTableConfig, 
ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # let table_paths = 
ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// let options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension(".parquet")
+    ///     .with_collect_stat(true);
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(options);  // Configure file format and 
options
+    /// ```
     pub fn with_listing_options(self, listing_options: ListingOptions) -> Self 
{
+        // Note: This method properly sets options, but be aware that 
downstream
+        // methods like infer_schema() and try_new() require both schema and 
options
+        // to be set to function correctly.
+        debug_assert!(
+            !self.table_paths.is_empty() || cfg!(test),
+            "ListingTableConfig::with_listing_options called without 
table_paths set. \
+             Consider calling new() or new_with_multi_paths() first to 
establish table paths."
+        );
+
         Self {
-            table_paths: self.table_paths,
-            file_schema: self.file_schema,
             options: Some(listing_options),
-            schema_source: self.schema_source,
+            ..self
         }
     }

Review Comment:
   This is a minor refactor because I `derive Default` for ListingTableConfig



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to