This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene-2.0 by this push:
     new d83b162869 [opt](position) add position iterator interface (#229)
d83b162869 is described below

commit d83b1628694a22c7bc7e12e0d8511363b1b770bd
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Sat Jun 29 23:32:00 2024 +0800

    [opt](position) add position iterator interface (#229)
---
 src/core/CLucene/index/SegmentTermDocs.cpp         |  4 +--
 src/core/CLucene/index/_SegmentHeader.h            |  4 +--
 src/core/CLucene/search/query/DcoIdSetIterator.h   | 16 ------------
 src/core/CLucene/search/query/TermIterator.h       | 29 ++++++++++++----------
 .../CLucene/search/query/TermPositionIterator.h    | 23 +++++++++++++++++
 src/core/CMakeLists.txt                            |  2 +-
 6 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp 
b/src/core/CLucene/index/SegmentTermDocs.cpp
index 9108f1dfd5..e346dc0ca2 100644
--- a/src/core/CLucene/index/SegmentTermDocs.cpp
+++ b/src/core/CLucene/index/SegmentTermDocs.cpp
@@ -19,7 +19,7 @@
 CL_NS_DEF(index)
 
 SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) : 
parent(_parent), freqStream(_parent->freqStream->clone()),
-                                                                 count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
+                                                                 count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(-1), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
                                                                  
maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL), 
freqBasePointer(0), proxBasePointer(0),
                                                                  
skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0), 
indexVersion_(_parent->_fieldInfos->getIndexVersion()),
                                                                  
hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx, 
indexVersion_) {
@@ -73,7 +73,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) {
         df = 0;
     } else {// punt case
         df = ti->docFreq;
-        _doc = 0;
+        _doc = -1;
         freqBasePointer = ti->freqPointer;
         proxBasePointer = ti->proxPointer;
         skipPointer = freqBasePointer + ti->skipOffset;
diff --git a/src/core/CLucene/index/_SegmentHeader.h 
b/src/core/CLucene/index/_SegmentHeader.h
index bf988a2f27..c1f01e7cec 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -93,8 +93,8 @@ protected:
   int32_t count;
   int32_t df;
   CL_NS(util)::BitSet* deletedDocs;
-  int32_t _doc;
-  int32_t _freq;
+  int32_t _doc = -1;
+  int32_t _freq = 0;
   int32_t docs[PFOR_BLOCK_SIZE];         // buffered doc numbers
   int32_t freqs[PFOR_BLOCK_SIZE];        // buffered term freqs
   int32_t pointer;
diff --git a/src/core/CLucene/search/query/DcoIdSetIterator.h 
b/src/core/CLucene/search/query/DcoIdSetIterator.h
deleted file mode 100644
index 88aa431357..0000000000
--- a/src/core/CLucene/search/query/DcoIdSetIterator.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "CLucene/index/DocRange.h"
-
-class DocIdSetIterator {
-public:
-  DocIdSetIterator() = default;
-  virtual ~DocIdSetIterator() = default;
-
-  virtual int32_t docID() = 0;
-  virtual int32_t nextDoc() = 0;
-  virtual int32_t advance(int32_t target) = 0;
-
-  virtual int32_t docFreq() const = 0;
-  virtual bool readRange(DocRange* docRange) const = 0;
-};
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermIterator.h 
b/src/core/CLucene/search/query/TermIterator.h
index e0cf23a4fb..3eb22a254d 100644
--- a/src/core/CLucene/search/query/TermIterator.h
+++ b/src/core/CLucene/search/query/TermIterator.h
@@ -1,51 +1,54 @@
 #pragma once
 
-#include "CLucene/search/query/DcoIdSetIterator.h"
 #include "CLucene/index/Terms.h"
 
 #include <limits.h>
+#include <cstdint>
 
 CL_NS_USE(index)
 
-class TermIterator : public DocIdSetIterator {
+class TermIterator {
 public:
   TermIterator() = default;
-  TermIterator(TermDocs* termDocs) : termDocs_(termDocs) {
+  TermIterator(TermDocs* termDocs) 
+    : termDocs_(termDocs) {
   }
 
-  virtual ~TermIterator() = default;
-
-  bool isEmpty() {
+  inline bool isEmpty() const {
     return termDocs_ == nullptr;
   }
 
-  int32_t docID() override {
-    uint32_t docId = termDocs_->doc();
+  inline int32_t docID() const {
+    int32_t docId = termDocs_->doc();
     return docId >= INT_MAX ? INT_MAX : docId;
   }
 
-  int32_t nextDoc() override {
+  inline int32_t freq() const {
+    return termDocs_->freq();
+  }
+
+  inline int32_t nextDoc() const {
     if (termDocs_->next()) {
       return termDocs_->doc();
     }
     return INT_MAX;
   }
 
-  int32_t advance(int32_t target) override {
+  inline int32_t advance(int32_t target) const {
     if (termDocs_->skipTo(target)) {
       return termDocs_->doc();
     }
     return INT_MAX;
   }
 
-  int32_t docFreq() const override {
+  inline int32_t docFreq() const {
     return termDocs_->docFreq();
   }
 
-  bool readRange(DocRange* docRange) const override {
+  inline bool readRange(DocRange* docRange) const {
     return termDocs_->readRange(docRange);
   }
   
-private:
+protected:
   TermDocs* termDocs_ = nullptr;
 };
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermPositionIterator.h 
b/src/core/CLucene/search/query/TermPositionIterator.h
new file mode 100644
index 0000000000..d64af4098f
--- /dev/null
+++ b/src/core/CLucene/search/query/TermPositionIterator.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "CLucene/search/query/TermIterator.h"
+#include "CLucene/index/Terms.h"
+
+#include <limits.h>
+
+CL_NS_USE(index)
+
+class TermPositionIterator : public TermIterator {
+public:
+  TermPositionIterator() = default;
+  TermPositionIterator(TermPositions* termPositions) 
+    : TermIterator(termPositions), termPositions_(termPositions) {
+  }
+
+  inline int32_t nextPosition() const {
+    return termPositions_->nextPosition();
+  }
+
+private:
+  TermPositions* termPositions_ = nullptr;
+};
\ No newline at end of file
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e1c13305aa..b9a09bb306 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -199,8 +199,8 @@ SET(clucene_core_Files
     ./CLucene/search/spans/SpanWeight.cpp
     ./CLucene/search/spans/SpanWeight.h
     ./CLucene/search/spans/TermSpans.cpp
-    ./CLucene/search/query/DcoIdSetIterator.h
     ./CLucene/search/query/TermIterator.h
+    ./CLucene/search/query/TermPositionIterator.h
     )
 
 #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means 
rebuilding them for the core)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to