This is an automated email from the ASF dual-hosted git repository.

lixueclaire pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new aca9f5de feat(C++): filter property and return VerticesCollection 
(#658)
aca9f5de is described below

commit aca9f5de62b3db99eab1e094ce7797c825a06c08
Author: Elssky <43638383+els...@users.noreply.github.com>
AuthorDate: Tue Nov 19 10:33:57 2024 +0800

    feat(C++): filter property and return VerticesCollection (#658)
---
 ...l_filtering_example.cc => filtering_example.cc} |  57 ++++++++++-
 cpp/src/graphar/high-level/graph_reader.cc         | 108 ++++++++++++++++++++-
 cpp/src/graphar/high-level/graph_reader.h          |  12 +++
 3 files changed, 174 insertions(+), 3 deletions(-)

diff --git a/cpp/examples/label_filtering_example.cc 
b/cpp/examples/filtering_example.cc
similarity index 62%
rename from cpp/examples/label_filtering_example.cc
rename to cpp/examples/filtering_example.cc
index e519bdda..643028e7 100644
--- a/cpp/examples/label_filtering_example.cc
+++ b/cpp/examples/filtering_example.cc
@@ -80,8 +80,63 @@ void vertices_collection(
     std::cout << property << " ";
     std::cout << std::endl;
   }
-}
+  std::cout << std::endl;
+
+  std::cout << "Test vertices with property in a filtered vertices set"
+            << std::endl;
+  std::cout << "--------------------------------------" << std::endl;
+  auto filter = graphar::_Equal(graphar::_Property("name"),
+                                graphar::_Literal("Safi_Airways"));
+  auto maybe_filter_vertices_collection_4 =
+      graphar::VerticesCollection::verticesWithProperty(
+          std::string("name"), filter, graph_info, type);
+  ASSERT(!maybe_filter_vertices_collection_4.has_error());
+  auto filter_vertices_4 = maybe_filter_vertices_collection_4.value();
+  std::cout << "valid vertices num: " << filter_vertices_4->size() << 
std::endl;
 
+  for (auto it = filter_vertices_4->begin(); it != filter_vertices_4->end();
+       ++it) {
+    // get a node's all labels
+    auto label_result = it.label();
+    std::cout << "id: " << it.id() << " ";
+    if (!label_result.has_error()) {
+      for (auto label : label_result.value()) {
+        std::cout << label << " ";
+      }
+    }
+    std::cout << "name: ";
+    auto property = it.property<std::string>("name").value();
+    std::cout << property << " ";
+    std::cout << std::endl;
+  }
+
+  std::cout << "Test vertices with property" << std::endl;
+  std::cout << "--------------------------------------" << std::endl;
+  auto filter_2 =
+      graphar::_Equal(graphar::_Property("name"), 
graphar::_Literal("Kam_Air"));
+  auto maybe_filter_vertices_collection_5 =
+      graphar::VerticesCollection::verticesWithProperty(
+          std::string("name"), filter_2, filter_vertices_3);
+  ASSERT(!maybe_filter_vertices_collection_5.has_error());
+  auto filter_vertices_5 = maybe_filter_vertices_collection_5.value();
+  std::cout << "valid vertices num: " << filter_vertices_5->size() << 
std::endl;
+
+  for (auto it = filter_vertices_5->begin(); it != filter_vertices_5->end();
+       ++it) {
+    // get a node's all labels
+    auto label_result = it.label();
+    std::cout << "id: " << it.id() << " ";
+    if (!label_result.has_error()) {
+      for (auto label : label_result.value()) {
+        std::cout << label << " ";
+      }
+    }
+    std::cout << "name: ";
+    auto property = it.property<std::string>("name").value();
+    std::cout << property << " ";
+    std::cout << std::endl;
+  }
+}
 int main(int argc, char* argv[]) {
   // read file and construct graph info
   std::string path = GetTestingResourceRoot() + "/ldbc/parquet/ldbc.graph.yml";
diff --git a/cpp/src/graphar/high-level/graph_reader.cc 
b/cpp/src/graphar/high-level/graph_reader.cc
index 2cfe5b36..66438af2 100644
--- a/cpp/src/graphar/high-level/graph_reader.cc
+++ b/cpp/src/graphar/high-level/graph_reader.cc
@@ -17,13 +17,12 @@
  * under the License.
  */
 
+#include "graphar/high-level/graph_reader.h"
 #include <algorithm>
 #include <unordered_set>
-
 #include "arrow/array.h"
 #include "graphar/api/arrow_reader.h"
 #include "graphar/convert_to_arrow_type.h"
-#include "graphar/high-level/graph_reader.h"
 #include "graphar/label.h"
 #include "graphar/types.h"
 
@@ -264,6 +263,69 @@ Result<std::vector<IdType>> 
VerticesCollection::filter_by_acero(
   return indices64;
 }
 
+Result<std::vector<IdType>> VerticesCollection::filter(
+    std::string property_name, std::shared_ptr<Expression> filter_expression,
+    std::vector<IdType>* new_valid_chunk) {
+  std::vector<int> indices;
+  const int TOT_ROWS_NUM = vertex_num_;
+  const int CHUNK_SIZE = vertex_info_->GetChunkSize();
+  int total_count = 0;
+  auto property_group = vertex_info_->GetPropertyGroup(property_name);
+  auto maybe_filter_reader = graphar::VertexPropertyArrowChunkReader::Make(
+      vertex_info_, property_group, prefix_, {});
+  auto filter_reader = maybe_filter_reader.value();
+  filter_reader->Filter(filter_expression);
+  std::vector<int64_t> indices64;
+  if (is_filtered_) {
+    for (int chunk_idx : valid_chunk_) {
+      // how to itetate valid_chunk_?
+      filter_reader->seek(chunk_idx * CHUNK_SIZE);
+      auto filter_result = filter_reader->GetChunk();
+      auto filter_table = filter_result.value();
+      int count = filter_table->num_rows();
+      if (count != 0 && new_valid_chunk != nullptr) {
+        new_valid_chunk->emplace_back(static_cast<IdType>(chunk_idx));
+        // TODO(elssky): record indices
+        int kVertexIndexCol = filter_table->schema()->GetFieldIndex(
+            GeneralParams::kVertexIndexCol);
+        auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
+        auto int64_array =
+            std::static_pointer_cast<arrow::Int64Array>(column_array);
+        for (int64_t i = 0; i < int64_array->length(); ++i) {
+          if (!int64_array->IsNull(i)) {
+            indices64.push_back(int64_array->Value(i));
+          }
+        }
+      }
+    }
+  } else {
+    for (int chunk_idx = 0; chunk_idx * CHUNK_SIZE < TOT_ROWS_NUM;
+         ++chunk_idx) {
+      auto filter_result = filter_reader->GetChunk();
+      auto filter_table = filter_result.value();
+      int count = filter_table->num_rows();
+      filter_reader->next_chunk();
+      total_count += count;
+      if (count != 0) {
+        valid_chunk_.emplace_back(static_cast<IdType>(chunk_idx));
+        // TODO(elssky): record indices
+        int kVertexIndexCol = filter_table->schema()->GetFieldIndex(
+            GeneralParams::kVertexIndexCol);
+        auto column_array = filter_table->column(kVertexIndexCol)->chunk(0);
+        auto int64_array =
+            std::static_pointer_cast<arrow::Int64Array>(column_array);
+        for (int64_t i = 0; i < int64_array->length(); ++i) {
+          if (!int64_array->IsNull(i)) {
+            indices64.push_back(int64_array->Value(i));
+          }
+        }
+      }
+    }
+  }
+  // std::cout << "Total valid count: " << total_count << std::endl;
+  return indices64;
+}
+
 Result<std::shared_ptr<VerticesCollection>>
 VerticesCollection::verticesWithLabel(
     const std::string& filter_label,
@@ -384,6 +446,48 @@ VerticesCollection::verticesWithMultipleLabels(
   return new_vertices_collection;
 }
 
+Result<std::shared_ptr<VerticesCollection>>
+VerticesCollection::verticesWithProperty(
+    const std::string property_name, const graphar::util::Filter filter,
+    const std::shared_ptr<GraphInfo>& graph_info, const std::string& type) {
+  auto prefix = graph_info->GetPrefix();
+  auto vertex_info = graph_info->GetVertexInfo(type);
+  auto vertices_collection =
+      std::make_shared<VerticesCollection>(vertex_info, prefix);
+  vertices_collection->filtered_ids_ =
+      vertices_collection->filter(property_name, filter).value();
+  vertices_collection->is_filtered_ = true;
+  return vertices_collection;
+}
+
+Result<std::shared_ptr<VerticesCollection>>
+VerticesCollection::verticesWithProperty(
+    const std::string property_name, const graphar::util::Filter filter,
+    const std::shared_ptr<VerticesCollection>& vertices_collection) {
+  auto new_vertices_collection = std::make_shared<VerticesCollection>(
+      vertices_collection->vertex_info_, vertices_collection->prefix_);
+  auto filtered_ids = vertices_collection
+                          ->filter(property_name, filter,
+                                   &new_vertices_collection->valid_chunk_)
+                          .value();
+  if (vertices_collection->is_filtered_) {
+    std::unordered_set<IdType> origin_set(
+        vertices_collection->filtered_ids_.begin(),
+        vertices_collection->filtered_ids_.end());
+    std::unordered_set<int> intersection;
+    for (int num : filtered_ids) {
+      if (origin_set.count(num)) {
+        intersection.insert(num);
+      }
+    }
+    filtered_ids =
+        std::vector<IdType>(intersection.begin(), intersection.end());
+    new_vertices_collection->is_filtered_ = true;
+  }
+  new_vertices_collection->filtered_ids_ = filtered_ids;
+  return new_vertices_collection;
+}
+
 template <typename T>
 Result<T> Vertex::property(const std::string& property) const {
   if constexpr (std::is_final<T>::value) {
diff --git a/cpp/src/graphar/high-level/graph_reader.h 
b/cpp/src/graphar/high-level/graph_reader.h
index 19c8f716..31a64ff0 100644
--- a/cpp/src/graphar/high-level/graph_reader.h
+++ b/cpp/src/graphar/high-level/graph_reader.h
@@ -382,6 +382,10 @@ class VerticesCollection {
   Result<std::vector<IdType>> filter_by_acero(
       std::vector<std::string> filter_labels) const;
 
+  Result<std::vector<IdType>> filter(
+      std::string property_name, std::shared_ptr<Expression> filter_expression,
+      std::vector<IdType>* new_valid_chunk = nullptr);
+
   /**
    * @brief Query vertices with a specific label
    *
@@ -431,6 +435,14 @@ class VerticesCollection {
       const std::vector<std::string>& filter_labels,
       const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);
 
+  static Result<std::shared_ptr<VerticesCollection>> verticesWithProperty(
+      const std::string property_name, const graphar::util::Filter filter,
+      const std::shared_ptr<GraphInfo>& graph_info, const std::string& type);
+
+  static Result<std::shared_ptr<VerticesCollection>> verticesWithProperty(
+      const std::string property_name, const graphar::util::Filter filter,
+      const std::shared_ptr<VerticesCollection>& vertices_collection);
+
   /**
    * @brief Query vertices with multiple labels within a given collection
    *


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@graphar.apache.org
For additional commands, e-mail: commits-h...@graphar.apache.org

Reply via email to