corwinjoy commented on code in PR #16351: URL: https://github.com/apache/datafusion/pull/16351#discussion_r2136718671
########## datafusion/common/src/config.rs: ########## @@ -591,6 +930,12 @@ config_namespace! { /// writing out already in-memory data, such as from a cached /// data frame. pub maximum_buffered_record_batches_per_stream: usize, default = 2 + + /// Optional file decryption properties + pub file_decryption_properties: Option<ConfigFileDecryptionProperties>, default = None + + /// Optional file encryption properties + pub file_encryption_properties: Option<ConfigFileEncryptionProperties>, default = None Review Comment: @adamreeve and I are not completely sure where this should go. On the session context there's only a way to set the "global" `ParquetOptions` but not `TableParquetOptions`, which contains extra table-specific settings. It does feel a bit wrong to put file-specific decryption properties in the execution context (see later examples). Eg. if users were reading two different encrypted Parquet files in one query they might need to set different decryption properties for each file, so setting them in the execution context wouldn't work. At the moment I think this scenario would require creating separate listing tables and specifying `TableParquetOptions`. That's an edge case so maybe I'm overthinking this, but maybe being able to set file decryption properties in `ParquetReadOptions` would be a good idea? This doesn't really fit all that well with the reader options that Parquet has, though. ########## datafusion/common/src/config.rs: ########## @@ -2231,6 +2575,130 @@ mod tests { ); } + #[cfg(feature = "parquet")] + #[test] + fn parquet_table_encryption() { + use crate::config::{ + ConfigFileDecryptionProperties, ConfigFileEncryptionProperties, + }; + use parquet::encryption::decrypt::FileDecryptionProperties; + use parquet::encryption::encrypt::FileEncryptionProperties; + + let footer_key = b"0123456789012345".to_vec(); // 128bit/16 + let column_names = vec!["double_field", "float_field"]; + let column_keys = + vec![b"1234567890123450".to_vec(), b"1234567890123451".to_vec()]; + + let file_encryption_properties = + FileEncryptionProperties::builder(footer_key.clone()) + .with_column_keys(column_names.clone(), column_keys.clone()) + .unwrap() + .build() + .unwrap(); + + let decryption_properties = FileDecryptionProperties::builder(footer_key.clone()) + .with_column_keys(column_names.clone(), column_keys.clone()) + .unwrap() + .build() + .unwrap(); + + // Test round-trip + let config_encrypt: ConfigFileEncryptionProperties = + (&file_encryption_properties).into(); + let encryption_properties_built: FileEncryptionProperties = + config_encrypt.clone().into(); + assert_eq!(file_encryption_properties, encryption_properties_built); + + let config_decrypt: ConfigFileDecryptionProperties = + (&decryption_properties).into(); + let decryption_properties_built: FileDecryptionProperties = + config_decrypt.clone().into(); + assert_eq!(decryption_properties, decryption_properties_built); + + /////////////////////////////////////////////////////////////////////////////////// + // Test encryption config + + // Display original encryption config + // println!("{:#?}", config_encrypt); + + let mut table_config = TableOptions::new(); + table_config.set_config_format(ConfigFileType::PARQUET); + table_config + .parquet + .set( + "file_encryption_properties.encrypt_footer", + config_encrypt.encrypt_footer.to_string().as_str(), + ) + .unwrap(); + table_config + .parquet + .set( + "file_encryption_properties.footer_key_as_hex", + config_encrypt.footer_key_as_hex.as_str(), + ) + .unwrap(); + + for (i, col_name) in column_names.iter().enumerate() { + let key = format!("file_encryption_properties.column_keys_as_hex.{col_name}"); Review Comment: Note use of '.' as separator for column name, as mentioned above. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org