adamreeve opened a new pull request, #65: URL: https://github.com/apache/parquet-testing/pull/65
See https://github.com/apache/arrow/issues/45073. This fixes the encryption test files generated by Arrow C++ so that the repetition levels are correct. I tried to avoid changing other properties of these files like the compression and encodings used. After applying the change to fix the repetition level (https://github.com/apache/arrow/pull/45074), I generated the files by running the parquet-encryption-test tests with the following changes: ```diff diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index f450f9274c..67eb284272 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -253,7 +253,8 @@ TEST_P(TestDecryptionConfiguration, TestDecryption) { const char* param_file_name = std::get<1>(GetParam()); // Decrypt parquet file that was generated in write_configurations_test.cc test. std::string tmp_file_name = "tmp_" + std::string(param_file_name); - std::string file_name = temp_dir->path().ToString() + tmp_file_name; + //std::string file_name = temp_dir->path().ToString() + tmp_file_name; + std::string file_name = "/home/adam/dev/arrow/cpp/submodules/parquet-testing/fixed-data/" + tmp_file_name; if (!fexists(file_name)) { std::stringstream ss; ss << "File " << file_name << " is missing from temporary dir."; @@ -267,7 +268,7 @@ TEST_P(TestDecryptionConfiguration, TestDecryption) { CheckResults(file_name, decryption_config_num, encryption_config_num); } // Delete temporary test file. - ASSERT_EQ(std::remove(file_name.c_str()), 0); + //ASSERT_EQ(std::remove(file_name.c_str()), 0); // Decrypt parquet file that resides in parquet-testing/data directory. file_name = data_file(param_file_name); diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc index cf863da60a..22537f6abd 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.cc +++ b/cpp/src/parquet/encryption/test_encryption_util.cc @@ -207,7 +207,9 @@ void FileEncryptor::EncryptFile( std::string file, std::shared_ptr<parquet::FileEncryptionProperties> encryption_configurations) { WriterProperties::Builder prop_builder; - prop_builder.compression(parquet::Compression::UNCOMPRESSED); + prop_builder.version(ParquetVersion::PARQUET_2_4); + prop_builder.encoding(parquet::Encoding::RLE); + prop_builder.compression(parquet::Compression::SNAPPY); prop_builder.encryption(encryption_configurations); prop_builder.enable_write_page_index(); std::shared_ptr<WriterProperties> writer_properties = prop_builder.build(); diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index 9bfc774278..e0941a62f5 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -106,7 +106,7 @@ class FileEncryptor { private: std::shared_ptr<schema::GroupNode> SetupEncryptionSchema(); - int num_rowgroups_ = 5; + int num_rowgroups_ = 1; int rows_per_rowgroup_ = 50; std::shared_ptr<schema::GroupNode> schema_; }; diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc index f27da82694..349113dffa 100644 --- a/cpp/src/parquet/encryption/write_configurations_test.cc +++ b/cpp/src/parquet/encryption/write_configurations_test.cc @@ -84,7 +84,8 @@ class TestEncryptionConfiguration : public ::testing::Test { void EncryptFile( std::shared_ptr<parquet::FileEncryptionProperties> encryption_configurations, std::string file_name) { - std::string file = temp_dir->path().ToString() + file_name; + //std::string file = temp_dir->path().ToString() + file_name; + std::string file = "/home/adam/dev/arrow/cpp/submodules/parquet-testing/fixed-data/" + file_name; encryptor_.EncryptFile(file, encryption_configurations); } }; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@parquet.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@parquet.apache.org For additional commands, e-mail: issues-h...@parquet.apache.org