yangxk1 commented on code in PR #654: URL: https://github.com/apache/incubator-graphar/pull/654#discussion_r2099522935
########## cpp/src/graphar/high-level/graph_reader.cc: ########## @@ -94,6 +100,290 @@ Vertex::Vertex(IdType id, } } +Result<bool> VertexIter::hasLabel(const std::string& label) noexcept { + std::shared_ptr<arrow::ChunkedArray> column(nullptr); + label_reader_.seek(cur_offset_); + GAR_ASSIGN_OR_RAISE(auto chunk_table, label_reader_.GetLabelChunk()); + column = util::GetArrowColumnByName(chunk_table, label); + if (column != nullptr) { + auto array = util::GetArrowArrayByChunkIndex(column, 0); + auto bool_array = std::dynamic_pointer_cast<arrow::BooleanArray>(array); + return bool_array->Value(0); + } + return Status::KeyError("label with name ", label, + " does not exist in the vertex."); +} + +Result<std::vector<std::string>> VertexIter::label() noexcept { + std::shared_ptr<arrow::ChunkedArray> column(nullptr); + std::vector<std::string> vertex_label; + if (is_filtered_) + label_reader_.seek(filtered_ids_[cur_offset_]); + else + label_reader_.seek(cur_offset_); + GAR_ASSIGN_OR_RAISE(auto chunk_table, label_reader_.GetLabelChunk()); + for (auto label : labels_) { + column = util::GetArrowColumnByName(chunk_table, label); + if (column != nullptr) { + auto array = util::GetArrowArrayByChunkIndex(column, 0); + auto bool_array = std::dynamic_pointer_cast<arrow::BooleanArray>(array); + if (bool_array->Value(0)) { + vertex_label.push_back(label); + } + } + } + return vertex_label; +} + +static inline bool IsValid(bool* state, int column_number) { + for (int i = 0; i < column_number; ++i) { + // AND case + if (!state[i]) + return false; + // OR case + // if (state[i]) return true; + } + // AND case + return true; + // OR case + // return false; +} + +Result<std::vector<IdType>> VerticesCollection::filter( + std::vector<std::string> filter_labels, + std::vector<IdType>* new_valid_chunk) { + std::vector<int> indices; + const int TOT_ROWS_NUM = vertex_num_; + const int CHUNK_SIZE = vertex_info_->GetChunkSize(); + const int TOT_LABEL_NUM = labels_.size(); + const int TESTED_LABEL_NUM = filter_labels.size(); + std::vector<int> tested_label_ids; + + for (const auto& filter_label : filter_labels) { + auto it = std::find(labels_.begin(), labels_.end(), filter_label); + if (it != labels_.end()) { + tested_label_ids.push_back(std::distance(labels_.begin(), it)); + } + } + if (tested_label_ids.empty()) + return Status::KeyError( + "query label" + " does not exist in the vertex."); + + uint64_t* bitmap = new uint64_t[TOT_ROWS_NUM / 64 + 1]; Review Comment: I'm wondering why memory is allocated for the bitmap here — is it for security reasons or another purpose? In the `read_parquet_file_and_get_valid_indices` method, the `query_type` argument is always passed as `QUERY_TYPE::INDEX`. It seems that the bitmap is never actually used. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@graphar.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@graphar.apache.org For additional commands, e-mail: commits-h...@graphar.apache.org