alamb commented on code in PR #10224:
URL: https://github.com/apache/datafusion/pull/10224#discussion_r1580182058
##########
datafusion/common/src/config.rs:
##########
@@ -1773,4 +1808,38 @@ mod tests {
.iter()
.any(|item| item.key == "format.bloom_filter_enabled::col1"))
}
+
+ #[cfg(feature = "parquet")]
+ #[test]
+ fn parquet_table_options_config_metadata_entry() {
+ let mut table_config = TableOptions::new();
+ table_config.set_file_format(FileType::PARQUET);
+ table_config.set("format.metadata::key1", "").unwrap();
+ table_config.set("format.metadata::key2", "value2").unwrap();
+ table_config
+ .set("format.metadata::key3", "value with spaces ")
+ .unwrap();
+ table_config
+ .set("format.metadata::key4", "value with special chars :: :")
Review Comment:
nice
##########
datafusion/sqllogictest/test_files/copy.slt:
##########
@@ -283,11 +283,73 @@ OPTIONS (
'format.statistics_enabled::col2' none,
'format.max_statistics_size' 123,
'format.bloom_filter_fpp' 0.001,
-'format.bloom_filter_ndv' 100
+'format.bloom_filter_ndv' 100,
+'format.metadata::key' 'value'
)
----
2
+# valid vs invalid metadata
+
+# accepts map with a single entry
+statement ok
+COPY source_table
+TO 'test_files/scratch/copy/table_with_metadata/'
+STORED AS PARQUET
+OPTIONS (
+ 'format.metadata::key' 'value'
+)
+
+# accepts multiple entries (on different keys)
+statement ok
+COPY source_table
+TO 'test_files/scratch/copy/table_with_metadata/'
+STORED AS PARQUET
+OPTIONS (
+ 'format.metadata::key1' '',
+ 'format.metadata::key2' 'value',
+ 'format.metadata::key3' 'value with spaces',
+ 'format.metadata::key4' 'value with special chars :: :'
+)
+
+# accepts multiple entries with the same key (will overwrite)
+statement ok
+COPY source_table
+TO 'test_files/scratch/copy/table_with_metadata/'
+STORED AS PARQUET
+OPTIONS (
+ 'format.metadata::key1' 'value',
+ 'format.metadata::key1' 'value'
+)
Review Comment:
I agree there is no need to change it in this PR
##########
datafusion/common/src/config.rs:
##########
@@ -1364,12 +1364,31 @@ impl TableOptions {
/// Options that control how Parquet files are read, including global options
/// that apply to all columns and optional column-specific overrides
+///
+/// Closely tied to
[`ParquetWriterOptions`](crate::file_options::parquet_writer::ParquetWriterOptions).
+/// Properties not included in [`TableParquetOptions`] may not be configurable
at the external API
+/// (e.g. sorting_columns).
#[derive(Clone, Default, Debug, PartialEq)]
pub struct TableParquetOptions {
/// Global Parquet options that propagates to all columns.
pub global: ParquetOptions,
/// Column specific options. Default usage is parquet.XX::column.
pub column_specific_options: HashMap<String, ColumnOptions>,
+ /// Additional file-level metadata to include. Inserted into the
key_value_metadata
+ /// for the written
[`FileMetaData`](https://docs.rs/parquet/latest/parquet/file/metadata/struct.FileMetaData.html).
+ ///
+ /// Multiple entries are permitted
+ /// ```sql
+ /// OPTIONS (
+ /// 'format.metadata::key1' '',
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]