alamb commented on code in PR #16166:
URL: https://github.com/apache/datafusion/pull/16166#discussion_r2115541241


##########
datafusion/common/src/config.rs:
##########
@@ -1566,8 +1517,68 @@ impl TableOptions {
     /// # Parameters
     ///
     /// * `format`: The file format to use (e.g., CSV, Parquet).
-    pub fn set_config_format(&mut self, format: ConfigFileType) {
-        self.current_format = Some(format);
+    pub fn get_table_format_options(&self, format: ConfigFileType) -> 
TableFormatOptions {
+        match format {
+            ConfigFileType::CSV => {
+                let options = self.csv.clone();
+                TableFormatOptions::Csv {
+                    options,
+                    extensions: self.extensions.clone(),
+                }
+            }
+            #[cfg(feature = "parquet")]
+            ConfigFileType::PARQUET => {
+                let options = self.parquet.clone();
+                TableFormatOptions::Parquet {
+                    options,
+                    extensions: self.extensions.clone(),
+                }
+            }
+            ConfigFileType::JSON => {
+                let options = self.json.clone();
+                TableFormatOptions::Json {
+                    options,
+                    extensions: self.extensions.clone(),
+                }
+            }
+        }
+    }
+
+    /// Initializes a new `TableOptions` from a hash map of string settings.
+    ///
+    /// # Parameters
+    ///
+    /// * `settings`: A hash map where each key-value pair represents a 
configuration setting.
+    ///
+    /// # Returns
+    ///
+    /// A result containing the new `TableOptions` instance or an error if any 
setting could not be applied.
+    pub fn from_string_hash_map(settings: &HashMap<String, String>) -> 
Result<Self> {
+        let mut ret = Self::default();
+        for (k, v) in settings {
+            ret.set(k, v)?;
+        }
+
+        Ok(ret)
+    }
+
+    /// Modifies the current `TableOptions` instance with settings from a hash 
map.
+    ///
+    /// # Parameters
+    ///
+    /// * `settings`: A hash map where each key-value pair represents a 
configuration setting.
+    ///
+    /// # Returns
+    ///
+    /// A result indicating success or failure in applying the settings.
+    pub fn alter_with_string_hash_map(

Review Comment:
   maybe `update` would be clearer as `alter` might imply some change other 
than updating the map



##########
datafusion/common/src/config.rs:
##########
@@ -1612,42 +1623,241 @@ impl TableOptions {
         };
         e.0.set(key, value)
     }
+}
 
-    /// Initializes a new `TableOptions` from a hash map of string settings.
+#[derive(Debug, Clone)]
+#[allow(clippy::large_enum_variant)]
+pub enum TableFormatOptions {
+    Csv {
+        options: CsvOptions,
+        extensions: Extensions,
+    },
+    #[cfg(feature = "parquet")]
+    Parquet {
+        options: TableParquetOptions,
+        extensions: Extensions,
+    },
+    Json {
+        options: JsonOptions,
+        extensions: Extensions,
+    },
+}
+
+impl ConfigField for TableFormatOptions {
+    /// Visits configuration settings for the current file format, or all 
formats if none is selected.
+    ///
+    /// This method adapts the behavior based on whether a file format is 
currently selected in `current_format`.
+    /// If a format is selected, it visits only the settings relevant to that 
format. Otherwise,
+    /// it visits all available format settings.
+    fn visit<V: Visit>(&self, v: &mut V, _key_prefix: &str, _description: 
&'static str) {
+        match self {
+            #[cfg(feature = "parquet")]
+            TableFormatOptions::Parquet { options, .. } => {
+                options.visit(v, "format", "");
+            }
+            TableFormatOptions::Csv { options, .. } => {
+                options.visit(v, "format", "");
+            }
+            TableFormatOptions::Json { options, .. } => {
+                options.visit(v, "format", "");
+            }
+        }
+    }
+
+    /// Sets a configuration value for a specific key within `TableOptions`.
+    ///
+    /// This method delegates setting configuration values to the specific 
file format configurations,
+    /// based on the current format selected. If no format is selected, it 
returns an error.
     ///
     /// # Parameters
     ///
-    /// * `settings`: A hash map where each key-value pair represents a 
configuration setting.
+    /// * `key`: The configuration key specifying which setting to adjust, 
prefixed with the format (e.g., "format.delimiter")
+    ///   for CSV format.
+    /// * `value`: The value to set for the specified configuration key.
     ///
     /// # Returns
     ///
-    /// A result containing the new `TableOptions` instance or an error if any 
setting could not be applied.
-    pub fn from_string_hash_map(settings: &HashMap<String, String>) -> 
Result<Self> {
-        let mut ret = Self::default();
-        for (k, v) in settings {
-            ret.set(k, v)?;
+    /// A result indicating success or an error if the key is not recognized, 
if a format is not specified,
+    /// or if setting the configuration value fails for the specific format.
+    fn set(&mut self, key: &str, value: &str) -> Result<()> {
+        // Extensions are handled in the public `ConfigOptions::set`
+        let (key, rem) = key.split_once('.').unwrap_or((key, ""));
+        match key {
+            "format" => match self {
+                #[cfg(feature = "parquet")]
+                Self::Parquet { options, .. } => options.set(rem, value),
+                Self::Csv { options, .. } => options.set(rem, value),
+                Self::Json { options, .. } => options.set(rem, value),
+            },
+            _ => _config_err!("Config value \"{key}\" not found on 
TableOptions"),
         }
+    }
+}
 
-        Ok(ret)
+impl TableFormatOptions {
+    /// Constructs a new instance of `TableOptions` with JSON file type and 
default settings.
+    ///
+    /// # Returns
+    ///
+    /// A new `TableOptions` instance with default configuration values.
+    pub fn new_json() -> Self {
+        Self::Json {
+            options: Default::default(),
+            extensions: Default::default(),
+        }
     }
 
-    /// Modifies the current `TableOptions` instance with settings from a hash 
map.
+    /// Constructs a new instance of `TableOptions` with CSV file type and 
default settings.
+    ///
+    /// # Returns
+    ///
+    /// A new `TableOptions` instance with default configuration values.
+    pub fn new_csv() -> Self {
+        Self::Csv {
+            options: Default::default(),
+            extensions: Default::default(),
+        }
+    }
+
+    /// Constructs a new instance of `TableOptions` with Parquet file type and 
default settings.
+    ///
+    /// # Returns
+    ///
+    /// A new `TableOptions` instance with default configuration values.
+    #[cfg(feature = "parquet")]
+    pub fn new_parquet() -> Self {
+        Self::Parquet {
+            options: Default::default(),
+            extensions: Default::default(),
+        }
+    }
+
+    pub fn csv_options_or_default(&self) -> CsvOptions {

Review Comment:
   After reading this some more, I don't undertand the usecase for 
`csv_options_or_default` (and the other similar methods)
   
   Since this is now an enum, the calling code should directly handle the 
format in the TableFormatOptions -- this code will silently drop any config 
options if it isn't CSV which seems like it would potentially mask a bug in the 
caller
   
   I would expect calling code to do something like
   
   ```rust
   match TableFormatOptions { 
     Self::Csv => {}
     Self::Json => {}
     Self::Parquet => {{
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to