xiaokang commented on code in PR #33609:
URL: https://github.com/apache/doris/pull/33609#discussion_r1565041695


##########
be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp:
##########
@@ -23,58 +23,46 @@ DisjunctionQuery::DisjunctionQuery(const 
std::shared_ptr<lucene::search::IndexSe
                                    const TQueryOptions& query_options)
         : _searcher(searcher) {}
 
-DisjunctionQuery::~DisjunctionQuery() {
-    for (auto& term_doc : _term_docs) {
-        if (term_doc) {
-            _CLDELETE(term_doc);
-        }
-    }
-    for (auto& term : _terms) {
-        if (term) {
-            _CLDELETE(term);
-        }
-    }
-}
-
 void DisjunctionQuery::add(const std::wstring& field_name, const 
std::vector<std::string>& terms) {
     if (terms.empty()) {
         _CLTHROWA(CL_ERR_IllegalArgument, "DisjunctionQuery::add: terms 
empty");
     }
 
-    for (const auto& term : terms) {
-        std::wstring ws_term = StringUtil::string_to_wstring(term);
-        Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str());
-        _terms.push_back(t);
-        TermDocs* term_doc = _searcher->getReader()->termDocs(t);
-        _term_docs.push_back(term_doc);
-        _term_iterators.emplace_back(term_doc);
-    }
+    _field_name = field_name;
+    _terms = terms;
 }
 
 void DisjunctionQuery::search(roaring::Roaring& roaring) {
-    roaring::Roaring result;
-    auto func = [&roaring](const TermIterator& term_docs, bool first) {
-        roaring::Roaring result;
+    auto func = [this, &roaring](const std::string& term, bool first) {
+        std::wstring ws_term = StringUtil::string_to_wstring(term);
+        auto* t = _CLNEW Term(_field_name.c_str(), ws_term.c_str());
+        auto* term_doc = _searcher->getReader()->termDocs(t);
+        TermIterator iterator(term_doc);
+
         DocRange doc_range;
-        while (term_docs.readRange(&doc_range)) {
+        roaring::Roaring result;
+        while (iterator.readRange(&doc_range)) {
             if (doc_range.type_ == DocRangeType::kMany) {
                 result.addMany(doc_range.doc_many_size_, 
doc_range.doc_many->data());
             } else {
                 result.addRange(doc_range.doc_range.first, 
doc_range.doc_range.second);
             }
         }
+
+        _CLDELETE(term_doc);
+        _CLDELETE(t);
+
         if (first) {
             roaring.swap(result);
         } else {
             roaring |= result;
         }
     };
-    for (int i = 0; i < _term_iterators.size(); i++) {
-        auto& iter = _term_iterators[i];
+    for (int i = 0; i < _terms.size(); i++) {
         if (i == 0) {
-            func(iter, true);
+            func(_terms[i], true);

Review Comment:
   use one call func(_terms[i], i == 0) instead of if else



##########
be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h:
##########
@@ -28,17 +28,20 @@ class DisjunctionQuery : public Query {
 public:
     DisjunctionQuery(const std::shared_ptr<lucene::search::IndexSearcher>& 
searcher,
                      const TQueryOptions& query_options);
-    ~DisjunctionQuery() override;
+    ~DisjunctionQuery() override = default;
 
     void add(const std::wstring& field_name, const std::vector<std::string>& 
terms) override;
     void search(roaring::Roaring& roaring) override;
 
 private:
     std::shared_ptr<lucene::search::IndexSearcher> _searcher;
 
-    std::vector<Term*> _terms;
-    std::vector<TermDocs*> _term_docs;
-    std::vector<TermIterator> _term_iterators;
+    // std::vector<Term*> _terms;

Review Comment:
   just delete



##########
be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h:
##########
@@ -28,17 +28,20 @@ class DisjunctionQuery : public Query {
 public:
     DisjunctionQuery(const std::shared_ptr<lucene::search::IndexSearcher>& 
searcher,
                      const TQueryOptions& query_options);
-    ~DisjunctionQuery() override;
+    ~DisjunctionQuery() override = default;
 
     void add(const std::wstring& field_name, const std::vector<std::string>& 
terms) override;
     void search(roaring::Roaring& roaring) override;
 
 private:
     std::shared_ptr<lucene::search::IndexSearcher> _searcher;
 
-    std::vector<Term*> _terms;
-    std::vector<TermDocs*> _term_docs;
-    std::vector<TermIterator> _term_iterators;
+    // std::vector<Term*> _terms;
+    // std::vector<TermDocs*> _term_docs;
+    // std::vector<TermIterator> _term_iterators;
+
+    std::wstring _field_name;

Review Comment:
   It's better to put common fields: _filed_name and _terms in parent class 
Query.



##########
be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp:
##########
@@ -30,80 +30,43 @@ RegexpQuery::RegexpQuery(const 
std::shared_ptr<lucene::search::IndexSearcher>& s
           _max_expansions(query_options.inverted_index_max_expansions),
           _query(searcher, query_options) {}
 
-void RegexpQuery::add(const std::wstring& field_name, const 
std::vector<std::string>& patterns) {
-    if (patterns.size() != 1) {
+void RegexpQuery::add(const std::wstring& field_name, const 
std::vector<std::string>& terms) {
+    if (terms.size() != 1) {
         _CLTHROWA(CL_ERR_IllegalArgument, "RegexpQuery::add: terms size != 1");
     }
 
-    const std::string& pattern = patterns[0];
-
-    hs_database_t* database = nullptr;
-    hs_compile_error_t* compile_err = nullptr;
-    hs_scratch_t* scratch = nullptr;
-
-    if (hs_compile(pattern.data(), HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | 
HS_FLAG_UTF8,
-                   HS_MODE_BLOCK, nullptr, &database, &compile_err) != 
HS_SUCCESS) {
-        LOG(ERROR) << "hyperscan compilation failed: " << compile_err->message;
-        hs_free_compile_error(compile_err);
-        return;
-    }
-
-    if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
-        LOG(ERROR) << "hyperscan could not allocate scratch space.";
-        hs_free_database(database);
-        return;
-    }
-
-    auto on_match = [](unsigned int id, unsigned long long from, unsigned long 
long to,
-                       unsigned int flags, void* context) -> int {
-        *((bool*)context) = true;
-        return 0;
-    };
+    re2::RE2 pattern(terms[0]);
+    std::vector<std::string> results;

Review Comment:
   The original name terms is more easy to understand.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to