This is an automated email from the ASF dual-hosted git repository.
lixueclaire pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git
The following commit(s) were added to refs/heads/main by this push:
new 883d7eb0 feat(c++): support read and write multi-property (#719)
883d7eb0 is described below
commit 883d7eb05b71e7b349c8f735c911bc9079bbe313
Author: Xiaokang Yang <[email protected]>
AuthorDate: Mon Jul 21 13:41:06 2025 +0800
feat(c++): support read and write multi-property (#719)
---
cpp/CMakeLists.txt | 1 +
cpp/src/graphar/arrow/chunk_reader.cc | 7 +-
cpp/src/graphar/arrow/chunk_writer.cc | 13 +--
cpp/src/graphar/fwd.h | 2 +
cpp/src/graphar/graph_info.cc | 43 +++++++-
cpp/src/graphar/graph_info.h | 10 +-
cpp/src/graphar/types.h | 39 +++++++
cpp/test/test_info.cc | 116 ++++++++++++++++----
cpp/test/test_multi_label.cc | 20 ++--
cpp/test/test_multi_property.cc | 161 ++++++++++++++++++++++++++++
docs/specification/implementation-status.md | 2 +
11 files changed, 368 insertions(+), 46 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9d9f8736..5fb6c142 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -566,6 +566,7 @@ if (BUILD_TESTS)
add_test(test_arrow_chunk_reader SRCS test/test_arrow_chunk_reader.cc)
add_test(test_graph SRCS test/test_graph.cc)
add_test(test_multi_label SRCS test/test_multi_label.cc)
+ add_test(test_multi_property SRCS test/test_multi_property.cc)
# enable_testing()
endif()
diff --git a/cpp/src/graphar/arrow/chunk_reader.cc
b/cpp/src/graphar/arrow/chunk_reader.cc
index a5c77cd2..1b5200fc 100644
--- a/cpp/src/graphar/arrow/chunk_reader.cc
+++ b/cpp/src/graphar/arrow/chunk_reader.cc
@@ -49,8 +49,11 @@ Result<std::shared_ptr<arrow::Schema>> PropertyGroupToSchema(
GeneralParams::kVertexIndexCol, arrow::int64()));
}
for (const auto& prop : pg->GetProperties()) {
- fields.push_back(std::make_shared<arrow::Field>(
- prop.name, DataType::DataTypeToArrowDataType(prop.type)));
+ auto dataType = DataType::DataTypeToArrowDataType(prop.type);
+ if (prop.cardinality != Cardinality::SINGLE) {
+ dataType = arrow::list(dataType);
+ }
+ fields.push_back(std::make_shared<arrow::Field>(prop.name, dataType));
}
return arrow::schema(fields);
}
diff --git a/cpp/src/graphar/arrow/chunk_writer.cc
b/cpp/src/graphar/arrow/chunk_writer.cc
index 234bbfb3..c8612383 100644
--- a/cpp/src/graphar/arrow/chunk_writer.cc
+++ b/cpp/src/graphar/arrow/chunk_writer.cc
@@ -321,17 +321,6 @@ Status VertexPropertyWriter::WriteTable(
return Status::OK();
}
-// Helper function to split a string by a delimiter
-std::vector<std::string> SplitString(const std::string& str, char delimiter) {
- std::vector<std::string> tokens;
- std::string token;
- std::istringstream tokenStream(str);
- while (std::getline(tokenStream, token, delimiter)) {
- tokens.push_back(token);
- }
- return tokens;
-}
-
Status VertexPropertyWriter::WriteLabelTable(
const std::shared_ptr<arrow::Table>& input_table, IdType start_chunk_index,
FileType file_type, ValidateLevel validate_level) const {
@@ -379,6 +368,8 @@ Result<std::shared_ptr<arrow::Table>>
VertexPropertyWriter::GetLabelTable(
auto label_column = std::static_pointer_cast<arrow::StringArray>(chunk);
// Populate the matrix based on :LABEL column values
+ // TODO(@yangxk): store array in the label_column, split the string when
+ // reading file
for (int64_t row = 0; row < label_column->length(); ++row) {
if (label_column->IsValid(row)) {
std::string labels_string = label_column->GetString(row);
diff --git a/cpp/src/graphar/fwd.h b/cpp/src/graphar/fwd.h
index f7b5d34a..e7feefd0 100644
--- a/cpp/src/graphar/fwd.h
+++ b/cpp/src/graphar/fwd.h
@@ -72,6 +72,8 @@ class FileSystem;
using IdType = int64_t;
enum class Type;
class DataType;
+/** Defines how multiple values are handled for a given property key */
+enum Cardinality { SINGLE, LIST, SET };
/** Type of file format */
enum FileType { CSV = 0, PARQUET = 1, ORC = 2, JSON = 3 };
enum SelectType { PROPERTIES = 0, LABELS = 1 };
diff --git a/cpp/src/graphar/graph_info.cc b/cpp/src/graphar/graph_info.cc
index 81d1bba7..9959f194 100644
--- a/cpp/src/graphar/graph_info.cc
+++ b/cpp/src/graphar/graph_info.cc
@@ -20,6 +20,7 @@
#include <unordered_set>
#include <utility>
+#include "graphar/status.h"
#include "mini-yaml/yaml/Yaml.hpp"
#include "graphar/filesystem.h"
@@ -86,7 +87,8 @@ std::string BuildPath(const std::vector<std::string>& paths) {
bool operator==(const Property& lhs, const Property& rhs) {
return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
(lhs.is_primary == rhs.is_primary) &&
- (lhs.is_nullable == rhs.is_nullable);
+ (lhs.is_nullable == rhs.is_nullable) &&
+ (lhs.cardinality == rhs.cardinality);
}
PropertyGroup::PropertyGroup(const std::vector<Property>& properties,
@@ -138,6 +140,11 @@ bool PropertyGroup::IsValidated() const {
// list type is not supported in csv file
return false;
}
+ // TODO(@yangxk): support cardinality in csv file
+ if (p.cardinality != Cardinality::SINGLE && file_type_ == FileType::CSV) {
+ // list cardinality is not supported in csv file
+ return false;
+ }
}
return true;
}
@@ -212,6 +219,7 @@ class VertexInfo::Impl {
property_name_to_primary_.emplace(p.name, p.is_primary);
property_name_to_nullable_.emplace(p.name, p.is_nullable);
property_name_to_type_.emplace(p.name, p.type);
+ property_name_to_cardinality_.emplace(p.name, p.cardinality);
}
}
}
@@ -251,6 +259,7 @@ class VertexInfo::Impl {
std::unordered_map<std::string, bool> property_name_to_nullable_;
std::unordered_map<std::string, std::shared_ptr<DataType>>
property_name_to_type_;
+ std::unordered_map<std::string, Cardinality> property_name_to_cardinality_;
};
VertexInfo::VertexInfo(const std::string& type, IdType chunk_size,
@@ -363,6 +372,15 @@ Result<std::shared_ptr<DataType>>
VertexInfo::GetPropertyType(
return it->second;
}
+Result<Cardinality> VertexInfo::GetPropertyCardinality(
+ const std::string& property_name) const {
+ auto it = impl_->property_name_to_cardinality_.find(property_name);
+ if (it == impl_->property_name_to_cardinality_.end()) {
+ return Status::Invalid("property name not found: ", property_name);
+ }
+ return it->second;
+}
+
Result<std::shared_ptr<VertexInfo>> VertexInfo::AddPropertyGroup(
std::shared_ptr<PropertyGroup> property_group) const {
if (property_group == nullptr) {
@@ -440,8 +458,13 @@ Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
bool is_primary = p_node["is_primary"].As<bool>();
bool is_nullable =
p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
+ Cardinality cardinality = Cardinality::SINGLE;
+ if (!p_node["cardinality"].IsNone()) {
+ cardinality =
+ StringToCardinality(p_node["cardinality"].As<std::string>());
+ }
property_vec.emplace_back(property_name, property_type, is_primary,
- is_nullable);
+ is_nullable, cardinality);
}
property_groups.push_back(
std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
@@ -485,6 +508,9 @@ Result<std::string> VertexInfo::Dump() const noexcept {
p_node["data_type"] = p.type->ToTypeName();
p_node["is_primary"] = p.is_primary ? "true" : "false";
p_node["is_nullable"] = p.is_nullable ? "true" : "false";
+ if (p.cardinality != Cardinality::SINGLE) {
+ p_node["cardinality"] = CardinalityToString(p.cardinality);
+ }
pg_node["properties"].PushBack();
pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
}
@@ -574,6 +600,13 @@ class EdgeInfo::Impl {
}
// check if property name is unique in all property groups
for (const auto& p : pg->GetProperties()) {
+ if (p.cardinality != Cardinality::SINGLE) {
+ // edge property only supports single cardinality
+ std::cout
+ << "Edge property only supports single cardinality, but got: "
+ << CardinalityToString(p.cardinality) << std::endl;
+ return false;
+ }
if (check_property_unique_set.find(p.name) !=
check_property_unique_set.end()) {
return false;
@@ -910,6 +943,12 @@ Result<std::shared_ptr<EdgeInfo>>
EdgeInfo::Load(std::shared_ptr<Yaml> yaml) {
auto property_name = p_node["name"].As<std::string>();
auto property_type =
DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
+ if (!p_node["cardinality"].IsNone() &&
+ StringToCardinality(p_node["cardinality"].As<std::string>()) !=
+ Cardinality::SINGLE) {
+ return Status::YamlError(
+ "Unsupported set cardinality for edge property.");
+ }
bool is_primary = p_node["is_primary"].As<bool>();
bool is_nullable =
p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
diff --git a/cpp/src/graphar/graph_info.h b/cpp/src/graphar/graph_info.h
index c067810c..27a487d4 100644
--- a/cpp/src/graphar/graph_info.h
+++ b/cpp/src/graphar/graph_info.h
@@ -37,16 +37,20 @@ class Property {
std::shared_ptr<DataType> type; // property data type
bool is_primary; // primary key tag
bool is_nullable; // nullable tag for non-primary key
+ Cardinality
+ cardinality; // cardinality of the property, only use in vertex info
Property() = default;
explicit Property(const std::string& name,
const std::shared_ptr<DataType>& type = nullptr,
- bool is_primary = false, bool is_nullable = true)
+ bool is_primary = false, bool is_nullable = true,
+ Cardinality cardinality = Cardinality::SINGLE)
: name(name),
type(type),
is_primary(is_primary),
- is_nullable(!is_primary && is_nullable) {}
+ is_nullable(!is_primary && is_nullable),
+ cardinality(cardinality) {}
};
bool operator==(const Property& lhs, const Property& rhs);
@@ -276,6 +280,8 @@ class VertexInfo {
Result<std::shared_ptr<DataType>> GetPropertyType(
const std::string& property_name) const;
+ Result<Cardinality> GetPropertyCardinality(
+ const std::string& property_name) const;
/**
* Get whether the vertex info contains the specified property.
*
diff --git a/cpp/src/graphar/types.h b/cpp/src/graphar/types.h
index cb468a20..ac318ca4 100644
--- a/cpp/src/graphar/types.h
+++ b/cpp/src/graphar/types.h
@@ -23,6 +23,7 @@
#include <memory>
#include <string>
#include <utility>
+#include <vector>
#include "graphar/fwd.h"
#include "graphar/macros.h"
@@ -242,4 +243,42 @@ static inline const char* FileTypeToString(FileType
file_type) {
return file_type2string.at(file_type);
}
+static inline Cardinality StringToCardinality(const std::string& str) {
+ static const std::map<std::string, Cardinality> str2cardinality{
+ {"single", Cardinality::SINGLE},
+ {"list", Cardinality::LIST},
+ {"set", Cardinality::SET},
+ };
+ try {
+ return str2cardinality.at(str.c_str());
+ } catch (const std::exception& e) {
+ throw std::runtime_error("KeyError: " + str);
+ }
+}
+
+static inline const char* CardinalityToString(Cardinality cardinality) {
+ static const std::map<Cardinality, const char*> cardinality2string{
+ {Cardinality::SINGLE, "single"},
+ {Cardinality::LIST, "list"},
+ {Cardinality::SET, "set"},
+ };
+ try {
+ return cardinality2string.at(cardinality);
+ } catch (const std::exception& e) {
+ throw std::runtime_error("KeyError: " +
+ std::to_string(static_cast<int>(cardinality)));
+ }
+}
+
+// Helper function to split a string by a delimiter
+inline std::vector<std::string> SplitString(const std::string& str,
+ char delimiter) {
+ std::vector<std::string> tokens;
+ std::string token;
+ std::istringstream tokenStream(str);
+ while (std::getline(tokenStream, token, delimiter)) {
+ tokens.push_back(token);
+ }
+ return tokens;
+}
} // namespace graphar
diff --git a/cpp/test/test_info.cc b/cpp/test/test_info.cc
index 82d839e3..9901ea47 100644
--- a/cpp/test/test_info.cc
+++ b/cpp/test/test_info.cc
@@ -25,6 +25,8 @@
#include "./util.h"
#include "graphar/api/info.h"
+#include "graphar/fwd.h"
+#include "graphar/status.h"
#include <catch2/catch_test_macros.hpp>
@@ -75,9 +77,9 @@ TEST_CASE_METHOD(GlobalFixture, "Property") {
TEST_CASE_METHOD(GlobalFixture, "PropertyGroup") {
Property p0("p0", int32(), true);
- Property p1("p1", int32(), false);
- Property p2("p2", string(), false);
- Property p3("p3", float32(), false);
+ Property p1("p1", int32(), false, true, Cardinality::SINGLE);
+ Property p2("p2", string(), false, true, Cardinality::LIST);
+ Property p3("p3", float32(), false, true, Cardinality::SET);
Property p4("p4", float64(), false);
PropertyGroup pg0({p0, p1}, FileType::CSV, "p0_and_p1/");
@@ -94,6 +96,11 @@ TEST_CASE_METHOD(GlobalFixture, "PropertyGroup") {
REQUIRE(p.type->ToTypeName() == int32()->ToTypeName());
REQUIRE(p.is_primary == true);
REQUIRE(p.is_nullable == false);
+ // cardinality
+ REQUIRE(p0.cardinality == Cardinality::SINGLE);
+ REQUIRE(p1.cardinality == Cardinality::SINGLE);
+ REQUIRE(p2.cardinality == Cardinality::LIST);
+ REQUIRE(p3.cardinality == Cardinality::SET);
}
SECTION("FileType") {
@@ -180,8 +187,10 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
int chunk_size = 100;
auto version = std::make_shared<InfoVersion>(1);
auto pg = CreatePropertyGroup(
- {Property("p0", int32(), true), Property("p1", string(), false)},
- FileType::CSV, "p0_p1/");
+ {Property("p0", int32(), true),
+ Property("p1", string(), false, true, Cardinality::LIST),
+ Property("p2", string(), false, true, Cardinality::SET)},
+ FileType::PARQUET, "p0_p1/");
auto vertex_info =
CreateVertexInfo(type, chunk_size, {pg}, {}, "test_vertex", version);
@@ -206,6 +215,12 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
REQUIRE(vertex_info->IsPrimaryKey("p1") == false);
REQUIRE(vertex_info->IsNullableKey("p0") == false);
REQUIRE(vertex_info->IsNullableKey("p1") == true);
+ REQUIRE(vertex_info->GetPropertyCardinality("p0").value() ==
+ Cardinality::SINGLE);
+ REQUIRE(vertex_info->GetPropertyCardinality("p1").value() ==
+ Cardinality::LIST);
+ REQUIRE(vertex_info->GetPropertyCardinality("p2").value() ==
+ Cardinality::SET);
REQUIRE(vertex_info->HasProperty("not_exist") == false);
REQUIRE(vertex_info->IsPrimaryKey("not_exist") == false);
REQUIRE(vertex_info->HasPropertyGroup(nullptr) == false);
@@ -221,11 +236,17 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
SECTION("IsValidate") {
REQUIRE(vertex_info->IsValidated() == true);
- auto invalid_pg =
- CreatePropertyGroup({Property("p0", nullptr, true)}, FileType::CSV);
+ auto invalid_pg = CreatePropertyGroup(
+ {Property("p0", list(string()), true, false, Cardinality::SET)},
+ FileType::CSV);
auto invalid_vertex_info0 = CreateVertexInfo(type, chunk_size,
{invalid_pg},
{}, "test_vertex/", version);
REQUIRE(invalid_vertex_info0->IsValidated() == false);
+ invalid_pg =
+ CreatePropertyGroup({Property("p0", nullptr, true)}, FileType::CSV);
+ invalid_vertex_info0 = CreateVertexInfo(type, chunk_size, {invalid_pg}, {},
+ "test_vertex/", version);
+ REQUIRE(invalid_vertex_info0->IsValidated() == false);
VertexInfo invalid_vertex_info1("", chunk_size, {pg}, {}, "test_vertex/",
version);
REQUIRE(invalid_vertex_info1.IsValidated() == false);
@@ -252,17 +273,23 @@ TEST_CASE_METHOD(GlobalFixture, "VertexInfo") {
std::string expected = R"(chunk_size: 100
prefix: test_vertex
property_groups:
- - file_type: csv
+ - file_type: parquet
prefix: p0_p1/
properties:
- data_type: int32
is_nullable: false
is_primary: true
name: p0
- - data_type: string
+ - cardinality: list
+ data_type: string
is_nullable: true
is_primary: false
name: p1
+ - cardinality: set
+ data_type: string
+ is_nullable: true
+ is_primary: false
+ name: p2
type: test_vertex
version: gar/v1
)";
@@ -280,22 +307,23 @@ version: gar/v1
}
SECTION("AddPropertyGroup") {
- auto pg2 = CreatePropertyGroup({Property("p2", int32(), false)},
- FileType::CSV, "p2/");
- auto maybe_extend_info = vertex_info->AddPropertyGroup(pg2);
+ auto pg3 = CreatePropertyGroup({Property("p3", int32(), false)},
+ FileType::CSV, "p3/");
+ auto maybe_extend_info = vertex_info->AddPropertyGroup(pg3);
REQUIRE(maybe_extend_info.status().ok());
auto extend_info = maybe_extend_info.value();
REQUIRE(extend_info->PropertyGroupNum() == 2);
REQUIRE(extend_info->HasProperty("p2") == true);
- REQUIRE(extend_info->HasPropertyGroup(pg2) == true);
+ REQUIRE(extend_info->HasPropertyGroup(pg3) == true);
REQUIRE(extend_info->GetPropertyGroups().size() == 2);
- REQUIRE(*(extend_info->GetPropertyGroups()[1]) == *pg2);
- REQUIRE(extend_info->GetPropertyType("p2").value()->ToTypeName() ==
+ REQUIRE(*(extend_info->GetPropertyGroups()[1]) == *pg3);
+ REQUIRE(extend_info->GetPropertyType("p3").value()->ToTypeName() ==
int32()->ToTypeName());
- REQUIRE(extend_info->IsPrimaryKey("p2") == false);
- REQUIRE(extend_info->IsNullableKey("p2") == true);
- auto extend_info2 = extend_info->AddPropertyGroup(pg2);
- REQUIRE(!extend_info2.status().ok());
+ REQUIRE(extend_info->IsPrimaryKey("p3") == false);
+ REQUIRE(extend_info->IsNullableKey("p3") == true);
+ REQUIRE(extend_info->GetPropertyCardinality("p3") == Cardinality::SINGLE);
+ auto extend_info3 = extend_info->AddPropertyGroup(pg3);
+ REQUIRE(!extend_info3.status().ok());
}
}
@@ -388,6 +416,15 @@ TEST_CASE_METHOD(GlobalFixture, "EdgeInfo") {
src_chunk_size, dst_chunk_size, directed, {adj_list},
{invalid_pg}, "test_edge/", version);
REQUIRE(invalid_edge_info0->IsValidated() == false);
+ // edgeInfo does not support list/set cardinality
+ invalid_pg = CreatePropertyGroup(
+ {Property("p_cardinality", string(), false, true, Cardinality::LIST)},
+ FileType::PARQUET);
+ auto cardinality_invalid_edge_info0 =
+ CreateEdgeInfo(src_type, edge_type, dst_type, chunk_size,
+ src_chunk_size, dst_chunk_size, directed, {adj_list},
+ {invalid_pg}, "test_edge/", version);
+ REQUIRE(invalid_edge_info0->IsValidated() == false);
for (int i = 0; i < 3; i++) {
std::vector<std::string> types = {src_type, edge_type, dst_type};
types[i] = "";
@@ -691,6 +728,13 @@ property_groups:
is_primary: true
is_nullable: false
file_type: parquet
+ - properties:
+ - name: email
+ data_type: string
+ is_primary: false
+ is_nullable: true
+ cardinality: list
+ file_type: parquet
- properties:
- name: firstName
data_type: string
@@ -750,6 +794,10 @@ extra_info:
REQUIRE(vertex_info->GetChunkSize() == 100);
REQUIRE(vertex_info->GetPrefix() == "vertex/person/");
REQUIRE(vertex_info->version()->ToString() == "gar/v1");
+ REQUIRE(vertex_info->GetPropertyCardinality("id").value() ==
+ Cardinality::SINGLE);
+ REQUIRE(vertex_info->GetPropertyCardinality("email").value() ==
+ Cardinality::LIST);
}
SECTION("EdgeInfo::Load") {
@@ -761,6 +809,36 @@ extra_info:
REQUIRE(edge_info->GetDstType() == "person");
}
+ edge_info_yaml = R"(src_type: person
+edge_type: knows
+dst_type: person
+chunk_size: 1024
+src_chunk_size: 100
+dst_chunk_size: 100
+directed: false
+prefix: edge/person_knows_person/
+adj_lists:
+ - ordered: false
+ aligned_by: src
+ file_type: parquet
+property_groups:
+ - file_type: parquet
+ properties:
+ - name: creationDate
+ data_type: string
+ is_primary: false
+ is_nullable: true
+ cardinality: list
+version: gar/v1
+)";
+
+ SECTION("EdgeInfo::Load with cardinality") {
+ auto maybe_edge_info = EdgeInfo::Load(edge_info_yaml);
+ REQUIRE(maybe_edge_info.has_error());
+ REQUIRE(maybe_edge_info.status().code() == StatusCode::kYamlError);
+ std::cout << maybe_edge_info.status().message() << std::endl;
+ }
+
SECTION("GraphInfo::Load") {
auto maybe_graph_info = GraphInfo::Load(graph_info_yaml, "/");
std::cout << maybe_graph_info.status().message() << std::endl;
diff --git a/cpp/test/test_multi_label.cc b/cpp/test/test_multi_label.cc
index a77b9ff0..1795b211 100644
--- a/cpp/test/test_multi_label.cc
+++ b/cpp/test/test_multi_label.cc
@@ -17,19 +17,19 @@
* under the License.
*/
-#include <fstream>
+#include <arrow/compute/api.h>
+#include <cstddef>
#include <iostream>
-#include <unordered_map>
+#include <memory>
+#include <ostream>
+#include <string>
#include "arrow/api.h"
-#include "arrow/csv/api.h"
-#include "arrow/filesystem/api.h"
-#include "arrow/io/api.h"
-#include "graphar/fwd.h"
+#include "graphar/arrow/chunk_reader.h"
+#include "graphar/arrow/chunk_writer.h"
+#include "graphar/graph_info.h"
#include "parquet/arrow/writer.h"
#include "./util.h"
-#include "graphar/api/arrow_reader.h"
-#include "graphar/api/high_level_writer.h"
#include <catch2/catch_test_macros.hpp>
@@ -79,7 +79,7 @@ TEST_CASE_METHOD(GlobalFixture, "test_multi_label_builder") {
// write arrow table as parquet chunk
auto maybe_writer =
- VertexPropertyWriter::Make(vertex_info, test_data_dir +
"/ldbc/parquet/");
+ VertexPropertyWriter::Make(vertex_info, "/tmp/ldbc/parquet/");
REQUIRE(!maybe_writer.has_error());
auto writer = maybe_writer.value();
REQUIRE(writer->WriteTable(table, 0).ok());
@@ -87,7 +87,7 @@ TEST_CASE_METHOD(GlobalFixture, "test_multi_label_builder") {
// read label chunk as arrow table
auto maybe_reader = VertexPropertyArrowChunkReader::Make(
- graph_info, "organisation", labels, SelectType::LABELS);
+ vertex_info, labels, "/tmp/ldbc/parquet/");
assert(maybe_reader.status().ok());
auto reader = maybe_reader.value();
assert(reader->seek(0).ok());
diff --git a/cpp/test/test_multi_property.cc b/cpp/test/test_multi_property.cc
new file mode 100644
index 00000000..f82fc608
--- /dev/null
+++ b/cpp/test/test_multi_property.cc
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow/compute/api.h>
+#include <cstddef>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <string>
+#include "arrow/api.h"
+#include "examples/config.h"
+#include "graphar/arrow/chunk_reader.h"
+#include "graphar/arrow/chunk_writer.h"
+#include "graphar/graph_info.h"
+#include "graphar/types.h"
+#include "parquet/arrow/writer.h"
+
+#include "./util.h"
+
+#include <catch2/catch_test_macros.hpp>
+
+std::shared_ptr<arrow::Table> read_csv_to_table(const std::string& filename) {
+ arrow::csv::ReadOptions read_options{};
+ arrow::csv::ParseOptions parse_options{};
+ arrow::csv::ConvertOptions convert_options{};
+
+ parse_options.delimiter = '|';
+
+ auto input =
+ arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool())
+ .ValueOrDie();
+
+ auto reader = arrow::csv::TableReader::Make(arrow::io::default_io_context(),
+ input, read_options,
+ parse_options, convert_options)
+ .ValueOrDie();
+
+ std::shared_ptr<arrow::Table> table;
+ table = reader->Read().ValueOrDie();
+
+ return table;
+}
+
+namespace graphar {
+TEST_CASE_METHOD(GlobalFixture, "read from csv file") {
+ // read labels csv file as arrow table
+ auto person_table = read_csv_to_table(test_data_dir +
"/ldbc/person_0_0.csv");
+ auto seed = static_cast<unsigned int>(time(NULL));
+ int expected_row = rand_r(&seed) % person_table->num_rows();
+ auto person_schema = person_table->schema();
+ arrow::MemoryPool* pool = arrow::default_memory_pool();
+ auto value_builder = std::make_shared<arrow::StringBuilder>();
+ arrow::ListBuilder builder(pool, value_builder);
+ auto email_col_idx = person_table->schema()->GetFieldIndex("emails");
+ std::string expected_emails =
+ std::static_pointer_cast<arrow::StringArray>(
+ person_table->column(email_col_idx)->chunk(0))
+ ->GetString(expected_row);
+ for (int64_t chunk_idx = 0;
+ chunk_idx < person_table->column(email_col_idx)->num_chunks();
+ ++chunk_idx) {
+ auto chunk = person_table->column(email_col_idx)->chunk(chunk_idx);
+ auto email_column = std::static_pointer_cast<arrow::StringArray>(chunk);
+ for (int64_t row = 0; row < email_column->length(); ++row) {
+ auto result = builder.Append();
+ ASSERT(result.ok());
+ if (email_column->IsValid(row)) {
+ std::string emails_string = email_column->GetString(row);
+ auto row_emails = SplitString(emails_string, ';');
+ for (const auto& email : row_emails) {
+ ASSERT(value_builder->Append(email).ok());
+ }
+ }
+ }
+ }
+ std::shared_ptr<arrow::Array> array;
+ builder.Finish(&array);
+ auto person_emails_chunked_array = std::make_shared<arrow::ChunkedArray>(
+ std::vector<std::shared_ptr<arrow::Array>>{array});
+ int emailFieldIndex = person_schema->GetFieldIndex("emails");
+ person_table = person_table->RemoveColumn(emailFieldIndex).ValueOrDie();
+ person_schema = person_schema->RemoveField(emailFieldIndex).ValueOrDie();
+ person_schema =
+ person_schema
+ ->AddField(person_schema->num_fields(),
+ arrow::field("emails", arrow::list(arrow::utf8())))
+ .ValueOrDie();
+ person_table = person_table
+ ->AddColumn(person_table->num_columns(),
+ person_schema->fields().back(),
+ person_emails_chunked_array)
+ .ValueOrDie();
+ auto index = person_schema->GetFieldIndex("emails");
+ auto emails_col = person_table->column(index)->chunk(0);
+ auto result = std::static_pointer_cast<arrow::ListArray>(
+ emails_col->View(arrow::list(arrow::utf8())).ValueOrDie());
+ auto values = std::static_pointer_cast<arrow::StringArray>(result->values());
+ int64_t start = result->value_offset(expected_row);
+ int64_t end = result->value_offset(expected_row + 1);
+ std::string emails = "";
+ for (int64_t i = start; i < end; ++i) {
+ emails += values->GetString(i);
+ if (i < end - 1)
+ emails += ";";
+ }
+ std::cout << "random row: " << expected_row << std::endl;
+ ASSERT(expected_emails == emails);
+ // write to parquet file
+ std::string path = test_data_dir + "/ldbc/parquet/" + "ldbc.graph.yml";
+ auto graph_info = graphar::GraphInfo::Load(path).value();
+ auto vertex_info = graph_info->GetVertexInfo("person");
+ auto maybe_writer =
+ VertexPropertyWriter::Make(vertex_info, "/tmp/ldbc/parquet/");
+ REQUIRE(!maybe_writer.has_error());
+ auto writer = maybe_writer.value();
+ REQUIRE(writer->WriteTable(person_table, 0).ok());
+ REQUIRE(writer->WriteVerticesNum(person_table->num_rows()).ok());
+
+ auto maybe_reader = VertexPropertyArrowChunkReader::Make(
+ vertex_info, vertex_info->GetPropertyGroup("emails"),
+ "/tmp/ldbc/parquet/");
+ assert(maybe_reader.status().ok());
+ auto reader = maybe_reader.value();
+ assert(reader->seek(expected_row).ok());
+ auto table_result = reader->GetChunk();
+ ASSERT(table_result.status().ok());
+ auto table = table_result.value();
+ index = table->schema()->GetFieldIndex("emails");
+ emails_col = table->column(index)->chunk(0);
+ result = std::static_pointer_cast<arrow::ListArray>(
+ emails_col->View(arrow::list(arrow::large_utf8())).ValueOrDie());
+ expected_row = expected_row % vertex_info->GetChunkSize();
+ auto email_result =
+
std::static_pointer_cast<arrow::LargeStringArray>(result->value_slice(0));
+ emails = "";
+ end = email_result->length();
+ for (int64_t i = 0; i < end; ++i) {
+ emails += email_result->GetString(i);
+ if (i < end - 1)
+ emails += ";";
+ }
+ std::cout << emails << std::endl;
+ ASSERT(expected_emails == emails);
+}
+} // namespace graphar
diff --git a/docs/specification/implementation-status.md
b/docs/specification/implementation-status.md
index 6e634714..6d205ae7 100644
--- a/docs/specification/implementation-status.md
+++ b/docs/specification/implementation-status.md
@@ -117,6 +117,8 @@ Vertex features:
| tag | | | | |
| chunk based | ✓ | ✓ | ✓ | ✓ |
| property group | ✓ | ✓ | ✓ | ✓ |
+| multi-label | ✓ | | ✓ | |
+| multi-property | ✓ | | | |
:::note
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]