This is an automated email from the ASF dual-hosted git repository.

xiaokang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new fe013bc5 feat(C++): remove hardcoded row group size, keep 64M default 
(#872)
fe013bc5 is described below

commit fe013bc59d058d80742a47c2404c058b53ff6129
Author: Jason <[email protected]>
AuthorDate: Wed Feb 25 14:17:18 2026 +0800

    feat(C++): remove hardcoded row group size, keep 64M default (#872)
---
 cpp/src/graphar/filesystem.cc  | 6 ++++--
 cpp/src/graphar/writer_util.cc | 7 +++++++
 cpp/src/graphar/writer_util.h  | 3 ++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index 83f9b7dc..e1a5ef56 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -266,8 +266,9 @@ Status FileSystem::WriteTableToFile(
   }
   case FileType::PARQUET: {
     auto schema = table->schema();
+    auto row_group_size = options->getParquetMaxRowGroupLength();
     RETURN_NOT_ARROW_OK(parquet::arrow::WriteTable(
-        *table, arrow::default_memory_pool(), output_stream, 64 * 1024 * 1024,
+        *table, arrow::default_memory_pool(), output_stream, row_group_size,
         options->getParquetWriterProperties(),
         options->getArrowWriterProperties()));
     break;
@@ -300,8 +301,9 @@ Status FileSystem::WriteLabelTableToFile(
   parquet::WriterProperties::Builder builder;
   builder.compression(arrow::Compression::type::ZSTD);  // enable compression
   builder.encoding(parquet::Encoding::RLE);
+  auto row_group_size = builder.build()->max_row_group_length();
   RETURN_NOT_ARROW_OK(parquet::arrow::WriteTable(
-      *table, arrow::default_memory_pool(), output_stream, 64 * 1024 * 1024,
+      *table, arrow::default_memory_pool(), output_stream, row_group_size,
       builder.build(), parquet::default_arrow_writer_properties()));
   return Status::OK();
 }
diff --git a/cpp/src/graphar/writer_util.cc b/cpp/src/graphar/writer_util.cc
index 380e3df0..4a2ecd03 100644
--- a/cpp/src/graphar/writer_util.cc
+++ b/cpp/src/graphar/writer_util.cc
@@ -86,6 +86,13 @@ WriterOptions::getParquetWriterProperties() const {
   return builder.build();
 }
 
+int64_t WriterOptions::getParquetMaxRowGroupLength() const {
+  if (parquetOption_) {
+    return parquetOption_->max_row_group_length;
+  }
+  return parquet::WriterProperties::Builder().build()->max_row_group_length();
+}
+
 std::shared_ptr<parquet::ArrowWriterProperties>
 WriterOptions::getArrowWriterProperties() const {
   parquet::ArrowWriterProperties::Builder builder;
diff --git a/cpp/src/graphar/writer_util.h b/cpp/src/graphar/writer_util.h
index 1d2e6b12..457e3b41 100644
--- a/cpp/src/graphar/writer_util.h
+++ b/cpp/src/graphar/writer_util.h
@@ -94,7 +94,7 @@ class WriterOptions {
     std::vector<::parquet::SortingColumn> sorting_columns;
     int64_t dictionary_pagesize_limit = 1024 * 1024;
     int64_t write_batch_size = 1024;
-    int64_t max_row_group_length = 1024 * 1024;
+    int64_t max_row_group_length = 64 * 1024 * 1024;
     int64_t data_pagesize = 1024 * 1024;
     size_t max_statistics_size = 4096;
     int compression_level = std::numeric_limits<int>::min();
@@ -429,6 +429,7 @@ class WriterOptions {
   std::shared_ptr<parquet::WriterProperties> getParquetWriterProperties() 
const;
   std::shared_ptr<parquet::ArrowWriterProperties> getArrowWriterProperties()
       const;
+  int64_t getParquetMaxRowGroupLength() const;
 #ifdef ARROW_ORC
   arrow::adapters::orc::WriteOptions getOrcOption() const;
 #endif


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to