This is an automated email from the ASF dual-hosted git repository.

xiaokang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new 09703b6f feat(c++): make the c++ code compatible with both Arrow 
17.0.0 and Arrow 21.0.0 (#737)
09703b6f is described below

commit 09703b6fc3ed3499fc8ed1b410647e50c96ec506
Author: Xiaokang Yang <[email protected]>
AuthorDate: Fri Aug 29 16:49:35 2025 +0800

    feat(c++): make the c++ code compatible with both Arrow 17.0.0 and Arrow 
21.0.0 (#737)
    
    * change openfile method
    
    * init arrow compute inlatest arrow version
    
    * install arrow with brewfile
    
    * fix
---
 .github/workflows/ci.yml              |  6 ------
 cli/src/util.h                        |  8 ++------
 cpp/Brewfile                          |  1 +
 cpp/README.md                         |  8 --------
 cpp/src/graphar/arrow/chunk_writer.cc |  6 +++++-
 cpp/src/graphar/filesystem.cc         |  6 +++++-
 cpp/src/graphar/util.h                | 25 ++++++++++++++++++++++++-
 cpp/test/test_arrow_chunk_writer.cc   | 29 +++++++++--------------------
 cpp/test/test_builder.cc              | 17 +++++------------
 9 files changed, 51 insertions(+), 55 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 88c6428e..b1a8ff51 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -208,12 +208,6 @@ jobs:
     - name: Install dependencies
       run: |
         brew bundle --file=cpp/Brewfile
-        git clone https://github.com/Homebrew/homebrew-core.git --depth 1 
-        pushd homebrew-core
-        git fetch origin b76848f98196f6dd9d3c4e6f71d030da84d22ce8
-        git checkout b76848f98196f6dd9d3c4e6f71d030da84d22ce8
-        brew install ./Formula/a/apache-arrow.rb
-        popd
         git clone https://github.com/apache/incubator-graphar-testing.git 
$GAR_TEST_DATA --depth 1
     
     - name: Build GraphAr
diff --git a/cli/src/util.h b/cli/src/util.h
index 60026044..1ce39357 100644
--- a/cli/src/util.h
+++ b/cli/src/util.h
@@ -76,14 +76,10 @@ std::shared_ptr<arrow::Table> SelectColumns(
 std::shared_ptr<arrow::Table> GetDataFromParquetFile(
     const std::string& path, const std::vector<std::string>& column_names) {
   // Open the Parquet file
-  auto infile =
-      arrow::io::ReadableFile::Open(path, arrow::default_memory_pool())
-          .ValueOrDie();
-
   // Create a Parquet FileReader
   std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
-  auto status = parquet::arrow::OpenFile(infile, arrow::default_memory_pool(),
-                                         &parquet_reader);
+  auto status = graphar::util::OpenParquetArrowReader(
+      path, arrow::default_memory_pool(), &parquet_reader);
   if (!status.ok()) {
     throw std::runtime_error("Failed to create Parquet FileReader: " +
                              status.ToString());
diff --git a/cpp/Brewfile b/cpp/Brewfile
index 489ef478..889d3c90 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -17,6 +17,7 @@
 
 brew "cmake"
 brew "google-benchmark"
+brew "apache-arrow"
 brew "boost"
 brew "doxygen"
 brew "git"
diff --git a/cpp/README.md b/cpp/README.md
index 850e7c24..dae16f56 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -56,14 +56,6 @@ On macOS, you can use [Homebrew](https://brew.sh) to install 
the required packag
 ```bash
 brew update && brew bundle --file=cpp/Brewfile
 ```
-and run the following command to install the Arrow 20.0.0_1 C++ libraries:
-```bash
-git clone https://github.com/Homebrew/homebrew-core.git --depth 1 
-cd homebrew-core
-git fetch origin b76848f98196f6dd9d3c4e6f71d030da84d22ce8
-git checkout b76848f98196f6dd9d3c4e6f71d030da84d22ce8
-brew install ./Formula/a/apache-arrow.rb
-```
 
 > [!NOTE]
 > Currently, the Arrow C++ library has [disabled 
 > ARROW_ORC](https://github.com/Homebrew/homebrew-core/blob/4588359b7248b07379094de5310ee7ff89afa17e/Formula/a/apache-arrow.rb#L53)
 >  in the brew formula, so you need to build and install the Arrow C++ library 
 > manually (with `-DARROW_ORC=True`).
diff --git a/cpp/src/graphar/arrow/chunk_writer.cc 
b/cpp/src/graphar/arrow/chunk_writer.cc
index ba949bd2..eea62879 100644
--- a/cpp/src/graphar/arrow/chunk_writer.cc
+++ b/cpp/src/graphar/arrow/chunk_writer.cc
@@ -17,10 +17,11 @@
  * under the License.
  */
 
+#include <arrow/acero/api.h>
 #include <cstddef>
+#include <iostream>
 #include <unordered_map>
 #include <utility>
-
 #include "arrow/api.h"
 #include "arrow/compute/api.h"
 #include "graphar/fwd.h"
@@ -1005,6 +1006,9 @@ Result<std::shared_ptr<arrow::Table>> 
EdgeChunkWriter::getOffsetTable(
 Result<std::shared_ptr<arrow::Table>> EdgeChunkWriter::sortTable(
     const std::shared_ptr<arrow::Table>& input_table,
     const std::string& column_name) {
+#if ARROW_VERSION >= 21000000
+  RETURN_NOT_ARROW_OK(arrow::compute::Initialize());
+#endif
   auto exec_context = arrow::compute::default_exec_context();
   auto plan = arrow_acero_namespace::ExecPlan::Make(exec_context).ValueOrDie();
   auto table_source_options =
diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index abc6d975..600ed779 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -17,11 +17,13 @@
  * under the License.
  */
 
+#include <iostream>
 #include <memory>
 #include "graphar/writer_util.h"
 #ifdef ARROW_ORC
 #include "arrow/adapters/orc/adapter.h"
 #endif
+#include <arrow/compute/api.h>
 #include "arrow/api.h"
 #include "arrow/csv/api.h"
 #include "arrow/dataset/api.h"
@@ -147,7 +149,9 @@ Result<std::shared_ptr<arrow::Table>> 
FileSystem::ReadFileToTable(
                         arrow::dataset::FileSystemFactoryOptions()));
   GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(auto dataset, factory->Finish());
   GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(auto scan_builder, dataset->NewScan());
-
+#if ARROW_VERSION >= 21000000
+  RETURN_NOT_ARROW_OK(arrow::compute::Initialize());
+#endif
   // Apply the row filter and select the specified columns
   if (options.filter) {
     GAR_ASSIGN_OR_RAISE(auto filter, options.filter->Evaluate());
diff --git a/cpp/src/graphar/util.h b/cpp/src/graphar/util.h
index c51a03ae..d8da5f3b 100644
--- a/cpp/src/graphar/util.h
+++ b/cpp/src/graphar/util.h
@@ -26,7 +26,16 @@
 #include <vector>
 
 #include "graphar/result.h"
-
+#include "graphar/status.h"
+
+#include "arrow/api.h"
+#include "arrow/csv/api.h"
+#include "arrow/filesystem/api.h"
+#include "arrow/io/api.h"
+#include "arrow/stl.h"
+#include "arrow/util/uri.h"
+#include "parquet/arrow/reader.h"
+#include "parquet/arrow/writer.h"
 #define REGULAR_SEPARATOR "_"
 
 // forward declarations
@@ -250,4 +259,18 @@ struct ValueGetter<std::string> {
   static std::string Value(const void* data, int64_t offset);
 };
 
+static inline arrow::Status OpenParquetArrowReader(
+    const std::string& file_path, arrow::MemoryPool* pool,
+    std::unique_ptr<parquet::arrow::FileReader>* parquet_reader) {
+  std::shared_ptr<arrow::io::RandomAccessFile> input;
+  ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_path));
+#if defined(ARROW_VERSION) && ARROW_VERSION <= 20000000
+  ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, parquet_reader));
+#else
+  ARROW_ASSIGN_OR_RAISE(auto reader, parquet::arrow::OpenFile(input, pool));
+  *parquet_reader = std::move(reader);
+#endif
+  return arrow::Status::OK();
+}
+
 }  // namespace graphar::util
diff --git a/cpp/test/test_arrow_chunk_writer.cc 
b/cpp/test/test_arrow_chunk_writer.cc
index 03d093dc..d3926cf7 100644
--- a/cpp/test/test_arrow_chunk_writer.cc
+++ b/cpp/test/test_arrow_chunk_writer.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/api.h"
 #include "graphar/label.h"
+#include "graphar/util.h"
 #include "graphar/writer_util.h"
 #ifdef ARROW_ORC
 #include "arrow/adapters/orc/adapter.h"
@@ -140,11 +141,8 @@ TEST_CASE_METHOD(GlobalFixture, 
"TestVertexPropertyWriter") {
     std::shared_ptr<arrow::Table> table1 = maybe_table.ValueOrDie();
 
     // Open Parquet file reader
-    auto fs2 = arrow::fs::FileSystemFromUriOrPath(path2).ValueOrDie();
-    std::shared_ptr<arrow::io::RandomAccessFile> input2 =
-        fs2->OpenInputFile(path2).ValueOrDie();
     std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
-    st = parquet::arrow::OpenFile(input2, pool, &arrow_reader);
+    st = graphar::util::OpenParquetArrowReader(path2, pool, &arrow_reader);
 
     // Read entire file as a single Arrow table
     std::shared_ptr<arrow::Table> table2;
@@ -215,13 +213,9 @@ TEST_CASE_METHOD(GlobalFixture, 
"TestVertexPropertyWriter") {
     // read parquet file
     std::string parquet_file =
         "/tmp/option/vertex/person/firstName_lastName_gender/chunk0";
-    auto parquet_fs =
-        arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
-    std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
-        parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
     std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
-    auto st = parquet::arrow::OpenFile(
-        parquet_input, arrow::default_memory_pool(), &parquet_reader);
+    auto st = graphar::util::OpenParquetArrowReader(
+        parquet_file, arrow::default_memory_pool(), &parquet_reader);
     REQUIRE(st.ok());
     std::shared_ptr<arrow::Table> parquet_table;
     st = parquet_reader->ReadTable(&parquet_table);
@@ -283,11 +277,8 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
   std::string path = test_data_dir +
                      "/ldbc_sample/parquet/edge/person_knows_person/"
                      "unordered_by_source/adj_list/part0/chunk0";
-  auto fs = arrow::fs::FileSystemFromUriOrPath(path).ValueOrDie();
-  std::shared_ptr<arrow::io::RandomAccessFile> input =
-      fs->OpenInputFile(path).ValueOrDie();
   std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
-  st = parquet::arrow::OpenFile(input, pool, &arrow_reader);
+  st = graphar::util::OpenParquetArrowReader(path, pool, &arrow_reader);
   // Read entire file as a single Arrow table
   std::shared_ptr<arrow::Table> maybe_table;
   st = arrow_reader->ReadTable(&maybe_table);
@@ -326,6 +317,7 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
     // Write number of vertices
     REQUIRE(writer->WriteVerticesNum(903).ok());
 
+    auto fs = arrow::fs::FileSystemFromUriOrPath("/tmp/edge/").ValueOrDie();
     // Check the number of edges
     std::shared_ptr<arrow::io::InputStream> input2 =
         fs->OpenInputStream(
@@ -395,6 +387,7 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
     auto parse_options = arrow::csv::ParseOptions::Defaults();
     parse_options.delimiter = '|';
     auto read_options = arrow::csv::ReadOptions::Defaults();
+    auto fs = arrow::fs::FileSystemFromUriOrPath("/tmp/option/").ValueOrDie();
     std::shared_ptr<arrow::io::InputStream> chunk0_input =
         fs->OpenInputStream(
               
"/tmp/option/edge/person_knows_person/ordered_by_source/adj_list/"
@@ -432,13 +425,9 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
     std::string parquet_file =
         
"/tmp/option/edge/person_knows_person/ordered_by_source/adj_list/part0/"
         "chunk0";
-    auto parquet_fs =
-        arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
-    std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
-        parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
     std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
-    auto st = parquet::arrow::OpenFile(
-        parquet_input, arrow::default_memory_pool(), &parquet_reader);
+    auto st = graphar::util::OpenParquetArrowReader(
+        parquet_file, arrow::default_memory_pool(), &parquet_reader);
     REQUIRE(st.ok());
     std::shared_ptr<arrow::Table> parquet_table;
     st = parquet_reader->ReadTable(&parquet_table);
diff --git a/cpp/test/test_builder.cc b/cpp/test/test_builder.cc
index 2fc9962d..ab1d7364 100644
--- a/cpp/test/test_builder.cc
+++ b/cpp/test/test_builder.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/api.h"
 #include "arrow/stl.h"
 #include "arrow/util/uri.h"
+#include "graphar/util.h"
 #include "parquet/arrow/reader.h"
 #include "parquet/arrow/writer.h"
 
@@ -131,13 +132,9 @@ TEST_CASE_METHOD(GlobalFixture, "Test_vertices_builder") {
   REQUIRE((*ptr) == start_index + builder->GetNum());
   // check parquet file compression
   auto parquet_file = "/tmp/vertex/person/id/chunk0";
-  auto parquet_fs =
-      arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
-  std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
-      parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
   std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
-  REQUIRE(parquet::arrow::OpenFile(parquet_input, arrow::default_memory_pool(),
-                                   &parquet_reader)
+  REQUIRE(graphar::util::OpenParquetArrowReader(
+              parquet_file, arrow::default_memory_pool(), &parquet_reader)
               .ok());
   std::shared_ptr<arrow::Table> parquet_table;
   REQUIRE(parquet_reader->ReadTable(&parquet_table).ok());
@@ -238,13 +235,9 @@ TEST_CASE_METHOD(GlobalFixture, "test_edges_builder") {
   // check parquet file compression
   auto parquet_file =
       
"/tmp/edge/person_knows_person/ordered_by_dest/creationDate/part0/chunk0";
-  auto parquet_fs =
-      arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
-  std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
-      parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
   std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
-  REQUIRE(parquet::arrow::OpenFile(parquet_input, arrow::default_memory_pool(),
-                                   &parquet_reader)
+  REQUIRE(graphar::util::OpenParquetArrowReader(
+              parquet_file, arrow::default_memory_pool(), &parquet_reader)
               .ok());
   std::shared_ptr<arrow::Table> parquet_table;
   REQUIRE(parquet_reader->ReadTable(&parquet_table).ok());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to