alamb commented on code in PR #16166: URL: https://github.com/apache/datafusion/pull/16166#discussion_r2115541241
########## datafusion/common/src/config.rs: ########## @@ -1566,8 +1517,68 @@ impl TableOptions { /// # Parameters /// /// * `format`: The file format to use (e.g., CSV, Parquet). - pub fn set_config_format(&mut self, format: ConfigFileType) { - self.current_format = Some(format); + pub fn get_table_format_options(&self, format: ConfigFileType) -> TableFormatOptions { + match format { + ConfigFileType::CSV => { + let options = self.csv.clone(); + TableFormatOptions::Csv { + options, + extensions: self.extensions.clone(), + } + } + #[cfg(feature = "parquet")] + ConfigFileType::PARQUET => { + let options = self.parquet.clone(); + TableFormatOptions::Parquet { + options, + extensions: self.extensions.clone(), + } + } + ConfigFileType::JSON => { + let options = self.json.clone(); + TableFormatOptions::Json { + options, + extensions: self.extensions.clone(), + } + } + } + } + + /// Initializes a new `TableOptions` from a hash map of string settings. + /// + /// # Parameters + /// + /// * `settings`: A hash map where each key-value pair represents a configuration setting. + /// + /// # Returns + /// + /// A result containing the new `TableOptions` instance or an error if any setting could not be applied. + pub fn from_string_hash_map(settings: &HashMap<String, String>) -> Result<Self> { + let mut ret = Self::default(); + for (k, v) in settings { + ret.set(k, v)?; + } + + Ok(ret) + } + + /// Modifies the current `TableOptions` instance with settings from a hash map. + /// + /// # Parameters + /// + /// * `settings`: A hash map where each key-value pair represents a configuration setting. + /// + /// # Returns + /// + /// A result indicating success or failure in applying the settings. + pub fn alter_with_string_hash_map( Review Comment: maybe `update` would be clearer as `alter` might imply some change other than updating the map ########## datafusion/common/src/config.rs: ########## @@ -1612,42 +1623,241 @@ impl TableOptions { }; e.0.set(key, value) } +} - /// Initializes a new `TableOptions` from a hash map of string settings. +#[derive(Debug, Clone)] +#[allow(clippy::large_enum_variant)] +pub enum TableFormatOptions { + Csv { + options: CsvOptions, + extensions: Extensions, + }, + #[cfg(feature = "parquet")] + Parquet { + options: TableParquetOptions, + extensions: Extensions, + }, + Json { + options: JsonOptions, + extensions: Extensions, + }, +} + +impl ConfigField for TableFormatOptions { + /// Visits configuration settings for the current file format, or all formats if none is selected. + /// + /// This method adapts the behavior based on whether a file format is currently selected in `current_format`. + /// If a format is selected, it visits only the settings relevant to that format. Otherwise, + /// it visits all available format settings. + fn visit<V: Visit>(&self, v: &mut V, _key_prefix: &str, _description: &'static str) { + match self { + #[cfg(feature = "parquet")] + TableFormatOptions::Parquet { options, .. } => { + options.visit(v, "format", ""); + } + TableFormatOptions::Csv { options, .. } => { + options.visit(v, "format", ""); + } + TableFormatOptions::Json { options, .. } => { + options.visit(v, "format", ""); + } + } + } + + /// Sets a configuration value for a specific key within `TableOptions`. + /// + /// This method delegates setting configuration values to the specific file format configurations, + /// based on the current format selected. If no format is selected, it returns an error. /// /// # Parameters /// - /// * `settings`: A hash map where each key-value pair represents a configuration setting. + /// * `key`: The configuration key specifying which setting to adjust, prefixed with the format (e.g., "format.delimiter") + /// for CSV format. + /// * `value`: The value to set for the specified configuration key. /// /// # Returns /// - /// A result containing the new `TableOptions` instance or an error if any setting could not be applied. - pub fn from_string_hash_map(settings: &HashMap<String, String>) -> Result<Self> { - let mut ret = Self::default(); - for (k, v) in settings { - ret.set(k, v)?; + /// A result indicating success or an error if the key is not recognized, if a format is not specified, + /// or if setting the configuration value fails for the specific format. + fn set(&mut self, key: &str, value: &str) -> Result<()> { + // Extensions are handled in the public `ConfigOptions::set` + let (key, rem) = key.split_once('.').unwrap_or((key, "")); + match key { + "format" => match self { + #[cfg(feature = "parquet")] + Self::Parquet { options, .. } => options.set(rem, value), + Self::Csv { options, .. } => options.set(rem, value), + Self::Json { options, .. } => options.set(rem, value), + }, + _ => _config_err!("Config value \"{key}\" not found on TableOptions"), } + } +} - Ok(ret) +impl TableFormatOptions { + /// Constructs a new instance of `TableOptions` with JSON file type and default settings. + /// + /// # Returns + /// + /// A new `TableOptions` instance with default configuration values. + pub fn new_json() -> Self { + Self::Json { + options: Default::default(), + extensions: Default::default(), + } } - /// Modifies the current `TableOptions` instance with settings from a hash map. + /// Constructs a new instance of `TableOptions` with CSV file type and default settings. + /// + /// # Returns + /// + /// A new `TableOptions` instance with default configuration values. + pub fn new_csv() -> Self { + Self::Csv { + options: Default::default(), + extensions: Default::default(), + } + } + + /// Constructs a new instance of `TableOptions` with Parquet file type and default settings. + /// + /// # Returns + /// + /// A new `TableOptions` instance with default configuration values. + #[cfg(feature = "parquet")] + pub fn new_parquet() -> Self { + Self::Parquet { + options: Default::default(), + extensions: Default::default(), + } + } + + pub fn csv_options_or_default(&self) -> CsvOptions { Review Comment: After reading this some more, I don't undertand the usecase for `csv_options_or_default` (and the other similar methods) Since this is now an enum, the calling code should directly handle the format in the TableFormatOptions -- this code will silently drop any config options if it isn't CSV which seems like it would potentially mask a bug in the caller I would expect calling code to do something like ```rust match TableFormatOptions { Self::Csv => {} Self::Json => {} Self::Parquet => {{ } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org