This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 560ef02158 GH-46600: [C++][CI] Add job with ARROW_LARGE_MEMORY_TESTS 
enabled (#49490)
560ef02158 is described below

commit 560ef02158131487b54cfb1f3883def10d9a67a7
Author: Raúl Cumplido <[email protected]>
AuthorDate: Thu Mar 26 09:33:44 2026 +0100

    GH-46600: [C++][CI] Add job with ARROW_LARGE_MEMORY_TESTS enabled (#49490)
    
    ### Rationale for this change
    
    Now that we have self-hosted runners with AWS we should test the 
`ARROW_LARGE_MEMORY_TESTS` on CI.
    
    ### What changes are included in this PR?
    
    Added new runner for ARROW_LARGE_MEMORY_TESTS.
    Fix `parquet-writer-test` to generate huge expected page using huge 
`max_rows_per_page` instead of default.
    
    ### Are these changes tested?
    
    Yes via CI
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #46600
    
    Authored-by: Raúl Cumplido <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 .github/workflows/cpp_extra.yml       | 10 ++++++++++
 cpp/src/parquet/column_writer_test.cc | 12 ++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml
index 8ebf3b9dfb..f13b01041a 100644
--- a/.github/workflows/cpp_extra.yml
+++ b/.github/workflows/cpp_extra.yml
@@ -107,6 +107,16 @@ jobs:
           - image: alpine-linux-cpp
             runs-on: ubuntu-latest
             title: AMD64 Alpine Linux
+          - image: ubuntu-cpp
+            run-options: >-
+              -e ARROW_CTEST_TIMEOUT=2000
+              -e ARROW_C_FLAGS_DEBUG="-O1"
+              -e ARROW_CXX_FLAGS_DEBUG="-O1"
+              -e ARROW_GANDIVA=OFF
+              -e ARROW_LARGE_MEMORY_TESTS=ON
+              -e BUILD_WARNING_LEVEL=PRODUCTION
+            runs-on: "runs-on=${{ github.run_id 
}}/family=x8i.2xlarge/volume=80gb/spot=capacity-optimized"
+            title: AMD64 Ubuntu Large Memory Tests
           - image: conda-cpp
             run-options: >-
               -e ARROW_USE_MESON=ON
diff --git a/cpp/src/parquet/column_writer_test.cc 
b/cpp/src/parquet/column_writer_test.cc
index 157e73ffec..a453949172 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -1053,8 +1053,10 @@ TEST(TestColumnWriter, 
LARGE_MEMORY_TEST(WriteLargeDictEncodedPage)) {
                       {
                           PrimitiveNode::Make("item", Repetition::REQUIRED, 
Type::INT32),
                       }));
-  auto properties =
-      WriterProperties::Builder().data_pagesize(1024 * 1024 * 1024)->build();
+  auto properties = WriterProperties::Builder()
+                        .data_pagesize(1024 * 1024 * 1024)
+                        
->max_rows_per_page(std::numeric_limits<int64_t>::max())
+                        ->build();
   auto file_writer = ParquetFileWriter::Open(sink, schema, properties);
   auto rg_writer = file_writer->AppendRowGroup();
 
@@ -1124,8 +1126,10 @@ TEST(TestColumnWriter, 
LARGE_MEMORY_TEST(ThrowsOnDictIndicesTooLarge)) {
                       {
                           PrimitiveNode::Make("item", Repetition::REQUIRED, 
Type::INT32),
                       }));
-  auto properties =
-      WriterProperties::Builder().data_pagesize(4 * 1024LL * 1024 * 
1024)->build();
+  auto properties = WriterProperties::Builder()
+                        .data_pagesize(4 * 1024LL * 1024 * 1024)
+                        
->max_rows_per_page(std::numeric_limits<int64_t>::max())
+                        ->build();
   auto file_writer = ParquetFileWriter::Open(sink, schema, properties);
   auto rg_writer = file_writer->AppendRowGroup();
 

Reply via email to