This is an automated email from the ASF dual-hosted git repository.
xiaokang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new 09703b6f feat(c++): make the c++ code compatible with both Arrow
17.0.0 and Arrow 21.0.0 (#737)
09703b6f is described below
commit 09703b6fc3ed3499fc8ed1b410647e50c96ec506
Author: Xiaokang Yang <[email protected]>
AuthorDate: Fri Aug 29 16:49:35 2025 +0800
feat(c++): make the c++ code compatible with both Arrow 17.0.0 and Arrow
21.0.0 (#737)
* change openfile method
* init arrow compute inlatest arrow version
* install arrow with brewfile
* fix
---
.github/workflows/ci.yml | 6 ------
cli/src/util.h | 8 ++------
cpp/Brewfile | 1 +
cpp/README.md | 8 --------
cpp/src/graphar/arrow/chunk_writer.cc | 6 +++++-
cpp/src/graphar/filesystem.cc | 6 +++++-
cpp/src/graphar/util.h | 25 ++++++++++++++++++++++++-
cpp/test/test_arrow_chunk_writer.cc | 29 +++++++++--------------------
cpp/test/test_builder.cc | 17 +++++------------
9 files changed, 51 insertions(+), 55 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 88c6428e..b1a8ff51 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -208,12 +208,6 @@ jobs:
- name: Install dependencies
run: |
brew bundle --file=cpp/Brewfile
- git clone https://github.com/Homebrew/homebrew-core.git --depth 1
- pushd homebrew-core
- git fetch origin b76848f98196f6dd9d3c4e6f71d030da84d22ce8
- git checkout b76848f98196f6dd9d3c4e6f71d030da84d22ce8
- brew install ./Formula/a/apache-arrow.rb
- popd
git clone https://github.com/apache/incubator-graphar-testing.git
$GAR_TEST_DATA --depth 1
- name: Build GraphAr
diff --git a/cli/src/util.h b/cli/src/util.h
index 60026044..1ce39357 100644
--- a/cli/src/util.h
+++ b/cli/src/util.h
@@ -76,14 +76,10 @@ std::shared_ptr<arrow::Table> SelectColumns(
std::shared_ptr<arrow::Table> GetDataFromParquetFile(
const std::string& path, const std::vector<std::string>& column_names) {
// Open the Parquet file
- auto infile =
- arrow::io::ReadableFile::Open(path, arrow::default_memory_pool())
- .ValueOrDie();
-
// Create a Parquet FileReader
std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
- auto status = parquet::arrow::OpenFile(infile, arrow::default_memory_pool(),
- &parquet_reader);
+ auto status = graphar::util::OpenParquetArrowReader(
+ path, arrow::default_memory_pool(), &parquet_reader);
if (!status.ok()) {
throw std::runtime_error("Failed to create Parquet FileReader: " +
status.ToString());
diff --git a/cpp/Brewfile b/cpp/Brewfile
index 489ef478..889d3c90 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -17,6 +17,7 @@
brew "cmake"
brew "google-benchmark"
+brew "apache-arrow"
brew "boost"
brew "doxygen"
brew "git"
diff --git a/cpp/README.md b/cpp/README.md
index 850e7c24..dae16f56 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -56,14 +56,6 @@ On macOS, you can use [Homebrew](https://brew.sh) to install
the required packag
```bash
brew update && brew bundle --file=cpp/Brewfile
```
-and run the following command to install the Arrow 20.0.0_1 C++ libraries:
-```bash
-git clone https://github.com/Homebrew/homebrew-core.git --depth 1
-cd homebrew-core
-git fetch origin b76848f98196f6dd9d3c4e6f71d030da84d22ce8
-git checkout b76848f98196f6dd9d3c4e6f71d030da84d22ce8
-brew install ./Formula/a/apache-arrow.rb
-```
> [!NOTE]
> Currently, the Arrow C++ library has [disabled
> ARROW_ORC](https://github.com/Homebrew/homebrew-core/blob/4588359b7248b07379094de5310ee7ff89afa17e/Formula/a/apache-arrow.rb#L53)
> in the brew formula, so you need to build and install the Arrow C++ library
> manually (with `-DARROW_ORC=True`).
diff --git a/cpp/src/graphar/arrow/chunk_writer.cc
b/cpp/src/graphar/arrow/chunk_writer.cc
index ba949bd2..eea62879 100644
--- a/cpp/src/graphar/arrow/chunk_writer.cc
+++ b/cpp/src/graphar/arrow/chunk_writer.cc
@@ -17,10 +17,11 @@
* under the License.
*/
+#include <arrow/acero/api.h>
#include <cstddef>
+#include <iostream>
#include <unordered_map>
#include <utility>
-
#include "arrow/api.h"
#include "arrow/compute/api.h"
#include "graphar/fwd.h"
@@ -1005,6 +1006,9 @@ Result<std::shared_ptr<arrow::Table>>
EdgeChunkWriter::getOffsetTable(
Result<std::shared_ptr<arrow::Table>> EdgeChunkWriter::sortTable(
const std::shared_ptr<arrow::Table>& input_table,
const std::string& column_name) {
+#if ARROW_VERSION >= 21000000
+ RETURN_NOT_ARROW_OK(arrow::compute::Initialize());
+#endif
auto exec_context = arrow::compute::default_exec_context();
auto plan = arrow_acero_namespace::ExecPlan::Make(exec_context).ValueOrDie();
auto table_source_options =
diff --git a/cpp/src/graphar/filesystem.cc b/cpp/src/graphar/filesystem.cc
index abc6d975..600ed779 100644
--- a/cpp/src/graphar/filesystem.cc
+++ b/cpp/src/graphar/filesystem.cc
@@ -17,11 +17,13 @@
* under the License.
*/
+#include <iostream>
#include <memory>
#include "graphar/writer_util.h"
#ifdef ARROW_ORC
#include "arrow/adapters/orc/adapter.h"
#endif
+#include <arrow/compute/api.h>
#include "arrow/api.h"
#include "arrow/csv/api.h"
#include "arrow/dataset/api.h"
@@ -147,7 +149,9 @@ Result<std::shared_ptr<arrow::Table>>
FileSystem::ReadFileToTable(
arrow::dataset::FileSystemFactoryOptions()));
GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(auto dataset, factory->Finish());
GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(auto scan_builder, dataset->NewScan());
-
+#if ARROW_VERSION >= 21000000
+ RETURN_NOT_ARROW_OK(arrow::compute::Initialize());
+#endif
// Apply the row filter and select the specified columns
if (options.filter) {
GAR_ASSIGN_OR_RAISE(auto filter, options.filter->Evaluate());
diff --git a/cpp/src/graphar/util.h b/cpp/src/graphar/util.h
index c51a03ae..d8da5f3b 100644
--- a/cpp/src/graphar/util.h
+++ b/cpp/src/graphar/util.h
@@ -26,7 +26,16 @@
#include <vector>
#include "graphar/result.h"
-
+#include "graphar/status.h"
+
+#include "arrow/api.h"
+#include "arrow/csv/api.h"
+#include "arrow/filesystem/api.h"
+#include "arrow/io/api.h"
+#include "arrow/stl.h"
+#include "arrow/util/uri.h"
+#include "parquet/arrow/reader.h"
+#include "parquet/arrow/writer.h"
#define REGULAR_SEPARATOR "_"
// forward declarations
@@ -250,4 +259,18 @@ struct ValueGetter<std::string> {
static std::string Value(const void* data, int64_t offset);
};
+static inline arrow::Status OpenParquetArrowReader(
+ const std::string& file_path, arrow::MemoryPool* pool,
+ std::unique_ptr<parquet::arrow::FileReader>* parquet_reader) {
+ std::shared_ptr<arrow::io::RandomAccessFile> input;
+ ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_path));
+#if defined(ARROW_VERSION) && ARROW_VERSION <= 20000000
+ ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, parquet_reader));
+#else
+ ARROW_ASSIGN_OR_RAISE(auto reader, parquet::arrow::OpenFile(input, pool));
+ *parquet_reader = std::move(reader);
+#endif
+ return arrow::Status::OK();
+}
+
} // namespace graphar::util
diff --git a/cpp/test/test_arrow_chunk_writer.cc
b/cpp/test/test_arrow_chunk_writer.cc
index 03d093dc..d3926cf7 100644
--- a/cpp/test/test_arrow_chunk_writer.cc
+++ b/cpp/test/test_arrow_chunk_writer.cc
@@ -26,6 +26,7 @@
#include "arrow/api.h"
#include "graphar/label.h"
+#include "graphar/util.h"
#include "graphar/writer_util.h"
#ifdef ARROW_ORC
#include "arrow/adapters/orc/adapter.h"
@@ -140,11 +141,8 @@ TEST_CASE_METHOD(GlobalFixture,
"TestVertexPropertyWriter") {
std::shared_ptr<arrow::Table> table1 = maybe_table.ValueOrDie();
// Open Parquet file reader
- auto fs2 = arrow::fs::FileSystemFromUriOrPath(path2).ValueOrDie();
- std::shared_ptr<arrow::io::RandomAccessFile> input2 =
- fs2->OpenInputFile(path2).ValueOrDie();
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
- st = parquet::arrow::OpenFile(input2, pool, &arrow_reader);
+ st = graphar::util::OpenParquetArrowReader(path2, pool, &arrow_reader);
// Read entire file as a single Arrow table
std::shared_ptr<arrow::Table> table2;
@@ -215,13 +213,9 @@ TEST_CASE_METHOD(GlobalFixture,
"TestVertexPropertyWriter") {
// read parquet file
std::string parquet_file =
"/tmp/option/vertex/person/firstName_lastName_gender/chunk0";
- auto parquet_fs =
- arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
- std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
- parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
- auto st = parquet::arrow::OpenFile(
- parquet_input, arrow::default_memory_pool(), &parquet_reader);
+ auto st = graphar::util::OpenParquetArrowReader(
+ parquet_file, arrow::default_memory_pool(), &parquet_reader);
REQUIRE(st.ok());
std::shared_ptr<arrow::Table> parquet_table;
st = parquet_reader->ReadTable(&parquet_table);
@@ -283,11 +277,8 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
std::string path = test_data_dir +
"/ldbc_sample/parquet/edge/person_knows_person/"
"unordered_by_source/adj_list/part0/chunk0";
- auto fs = arrow::fs::FileSystemFromUriOrPath(path).ValueOrDie();
- std::shared_ptr<arrow::io::RandomAccessFile> input =
- fs->OpenInputFile(path).ValueOrDie();
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
- st = parquet::arrow::OpenFile(input, pool, &arrow_reader);
+ st = graphar::util::OpenParquetArrowReader(path, pool, &arrow_reader);
// Read entire file as a single Arrow table
std::shared_ptr<arrow::Table> maybe_table;
st = arrow_reader->ReadTable(&maybe_table);
@@ -326,6 +317,7 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
// Write number of vertices
REQUIRE(writer->WriteVerticesNum(903).ok());
+ auto fs = arrow::fs::FileSystemFromUriOrPath("/tmp/edge/").ValueOrDie();
// Check the number of edges
std::shared_ptr<arrow::io::InputStream> input2 =
fs->OpenInputStream(
@@ -395,6 +387,7 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
auto parse_options = arrow::csv::ParseOptions::Defaults();
parse_options.delimiter = '|';
auto read_options = arrow::csv::ReadOptions::Defaults();
+ auto fs = arrow::fs::FileSystemFromUriOrPath("/tmp/option/").ValueOrDie();
std::shared_ptr<arrow::io::InputStream> chunk0_input =
fs->OpenInputStream(
"/tmp/option/edge/person_knows_person/ordered_by_source/adj_list/"
@@ -432,13 +425,9 @@ TEST_CASE_METHOD(GlobalFixture, "TestEdgeChunkWriter") {
std::string parquet_file =
"/tmp/option/edge/person_knows_person/ordered_by_source/adj_list/part0/"
"chunk0";
- auto parquet_fs =
- arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
- std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
- parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
- auto st = parquet::arrow::OpenFile(
- parquet_input, arrow::default_memory_pool(), &parquet_reader);
+ auto st = graphar::util::OpenParquetArrowReader(
+ parquet_file, arrow::default_memory_pool(), &parquet_reader);
REQUIRE(st.ok());
std::shared_ptr<arrow::Table> parquet_table;
st = parquet_reader->ReadTable(&parquet_table);
diff --git a/cpp/test/test_builder.cc b/cpp/test/test_builder.cc
index 2fc9962d..ab1d7364 100644
--- a/cpp/test/test_builder.cc
+++ b/cpp/test/test_builder.cc
@@ -30,6 +30,7 @@
#include "arrow/io/api.h"
#include "arrow/stl.h"
#include "arrow/util/uri.h"
+#include "graphar/util.h"
#include "parquet/arrow/reader.h"
#include "parquet/arrow/writer.h"
@@ -131,13 +132,9 @@ TEST_CASE_METHOD(GlobalFixture, "Test_vertices_builder") {
REQUIRE((*ptr) == start_index + builder->GetNum());
// check parquet file compression
auto parquet_file = "/tmp/vertex/person/id/chunk0";
- auto parquet_fs =
- arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
- std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
- parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
- REQUIRE(parquet::arrow::OpenFile(parquet_input, arrow::default_memory_pool(),
- &parquet_reader)
+ REQUIRE(graphar::util::OpenParquetArrowReader(
+ parquet_file, arrow::default_memory_pool(), &parquet_reader)
.ok());
std::shared_ptr<arrow::Table> parquet_table;
REQUIRE(parquet_reader->ReadTable(&parquet_table).ok());
@@ -238,13 +235,9 @@ TEST_CASE_METHOD(GlobalFixture, "test_edges_builder") {
// check parquet file compression
auto parquet_file =
"/tmp/edge/person_knows_person/ordered_by_dest/creationDate/part0/chunk0";
- auto parquet_fs =
- arrow::fs::FileSystemFromUriOrPath(parquet_file).ValueOrDie();
- std::shared_ptr<arrow::io::RandomAccessFile> parquet_input =
- parquet_fs->OpenInputFile(parquet_file).ValueOrDie();
std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
- REQUIRE(parquet::arrow::OpenFile(parquet_input, arrow::default_memory_pool(),
- &parquet_reader)
+ REQUIRE(graphar::util::OpenParquetArrowReader(
+ parquet_file, arrow::default_memory_pool(), &parquet_reader)
.ok());
std::shared_ptr<arrow::Table> parquet_table;
REQUIRE(parquet_reader->ReadTable(&parquet_table).ok());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]