xiaokang commented on code in PR #33609: URL: https://github.com/apache/doris/pull/33609#discussion_r1565041695
########## be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp: ########## @@ -23,58 +23,46 @@ DisjunctionQuery::DisjunctionQuery(const std::shared_ptr<lucene::search::IndexSe const TQueryOptions& query_options) : _searcher(searcher) {} -DisjunctionQuery::~DisjunctionQuery() { - for (auto& term_doc : _term_docs) { - if (term_doc) { - _CLDELETE(term_doc); - } - } - for (auto& term : _terms) { - if (term) { - _CLDELETE(term); - } - } -} - void DisjunctionQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) { if (terms.empty()) { _CLTHROWA(CL_ERR_IllegalArgument, "DisjunctionQuery::add: terms empty"); } - for (const auto& term : terms) { - std::wstring ws_term = StringUtil::string_to_wstring(term); - Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str()); - _terms.push_back(t); - TermDocs* term_doc = _searcher->getReader()->termDocs(t); - _term_docs.push_back(term_doc); - _term_iterators.emplace_back(term_doc); - } + _field_name = field_name; + _terms = terms; } void DisjunctionQuery::search(roaring::Roaring& roaring) { - roaring::Roaring result; - auto func = [&roaring](const TermIterator& term_docs, bool first) { - roaring::Roaring result; + auto func = [this, &roaring](const std::string& term, bool first) { + std::wstring ws_term = StringUtil::string_to_wstring(term); + auto* t = _CLNEW Term(_field_name.c_str(), ws_term.c_str()); + auto* term_doc = _searcher->getReader()->termDocs(t); + TermIterator iterator(term_doc); + DocRange doc_range; - while (term_docs.readRange(&doc_range)) { + roaring::Roaring result; + while (iterator.readRange(&doc_range)) { if (doc_range.type_ == DocRangeType::kMany) { result.addMany(doc_range.doc_many_size_, doc_range.doc_many->data()); } else { result.addRange(doc_range.doc_range.first, doc_range.doc_range.second); } } + + _CLDELETE(term_doc); + _CLDELETE(t); + if (first) { roaring.swap(result); } else { roaring |= result; } }; - for (int i = 0; i < _term_iterators.size(); i++) { - auto& iter = _term_iterators[i]; + for (int i = 0; i < _terms.size(); i++) { if (i == 0) { - func(iter, true); + func(_terms[i], true); Review Comment: use one call func(_terms[i], i == 0) instead of if else ########## be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h: ########## @@ -28,17 +28,20 @@ class DisjunctionQuery : public Query { public: DisjunctionQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher, const TQueryOptions& query_options); - ~DisjunctionQuery() override; + ~DisjunctionQuery() override = default; void add(const std::wstring& field_name, const std::vector<std::string>& terms) override; void search(roaring::Roaring& roaring) override; private: std::shared_ptr<lucene::search::IndexSearcher> _searcher; - std::vector<Term*> _terms; - std::vector<TermDocs*> _term_docs; - std::vector<TermIterator> _term_iterators; + // std::vector<Term*> _terms; Review Comment: just delete ########## be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h: ########## @@ -28,17 +28,20 @@ class DisjunctionQuery : public Query { public: DisjunctionQuery(const std::shared_ptr<lucene::search::IndexSearcher>& searcher, const TQueryOptions& query_options); - ~DisjunctionQuery() override; + ~DisjunctionQuery() override = default; void add(const std::wstring& field_name, const std::vector<std::string>& terms) override; void search(roaring::Roaring& roaring) override; private: std::shared_ptr<lucene::search::IndexSearcher> _searcher; - std::vector<Term*> _terms; - std::vector<TermDocs*> _term_docs; - std::vector<TermIterator> _term_iterators; + // std::vector<Term*> _terms; + // std::vector<TermDocs*> _term_docs; + // std::vector<TermIterator> _term_iterators; + + std::wstring _field_name; Review Comment: It's better to put common fields: _filed_name and _terms in parent class Query. ########## be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp: ########## @@ -30,80 +30,43 @@ RegexpQuery::RegexpQuery(const std::shared_ptr<lucene::search::IndexSearcher>& s _max_expansions(query_options.inverted_index_max_expansions), _query(searcher, query_options) {} -void RegexpQuery::add(const std::wstring& field_name, const std::vector<std::string>& patterns) { - if (patterns.size() != 1) { +void RegexpQuery::add(const std::wstring& field_name, const std::vector<std::string>& terms) { + if (terms.size() != 1) { _CLTHROWA(CL_ERR_IllegalArgument, "RegexpQuery::add: terms size != 1"); } - const std::string& pattern = patterns[0]; - - hs_database_t* database = nullptr; - hs_compile_error_t* compile_err = nullptr; - hs_scratch_t* scratch = nullptr; - - if (hs_compile(pattern.data(), HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8, - HS_MODE_BLOCK, nullptr, &database, &compile_err) != HS_SUCCESS) { - LOG(ERROR) << "hyperscan compilation failed: " << compile_err->message; - hs_free_compile_error(compile_err); - return; - } - - if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) { - LOG(ERROR) << "hyperscan could not allocate scratch space."; - hs_free_database(database); - return; - } - - auto on_match = [](unsigned int id, unsigned long long from, unsigned long long to, - unsigned int flags, void* context) -> int { - *((bool*)context) = true; - return 0; - }; + re2::RE2 pattern(terms[0]); + std::vector<std::string> results; Review Comment: The original name terms is more easy to understand. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org