This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch clucene-2.0 in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene-2.0 by this push: new d83b162869 [opt](position) add position iterator interface (#229) d83b162869 is described below commit d83b1628694a22c7bc7e12e0d8511363b1b770bd Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Sat Jun 29 23:32:00 2024 +0800 [opt](position) add position iterator interface (#229) --- src/core/CLucene/index/SegmentTermDocs.cpp | 4 +-- src/core/CLucene/index/_SegmentHeader.h | 4 +-- src/core/CLucene/search/query/DcoIdSetIterator.h | 16 ------------ src/core/CLucene/search/query/TermIterator.h | 29 ++++++++++++---------- .../CLucene/search/query/TermPositionIterator.h | 23 +++++++++++++++++ src/core/CMakeLists.txt | 2 +- 6 files changed, 44 insertions(+), 34 deletions(-) diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp b/src/core/CLucene/index/SegmentTermDocs.cpp index 9108f1dfd5..e346dc0ca2 100644 --- a/src/core/CLucene/index/SegmentTermDocs.cpp +++ b/src/core/CLucene/index/SegmentTermDocs.cpp @@ -19,7 +19,7 @@ CL_NS_DEF(index) SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) : parent(_parent), freqStream(_parent->freqStream->clone()), - count(0), df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0), skipInterval(_parent->tis->getSkipInterval()), + count(0), df(0), deletedDocs(_parent->deletedDocs), _doc(-1), _freq(0), skipInterval(_parent->tis->getSkipInterval()), maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL), freqBasePointer(0), proxBasePointer(0), skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0), indexVersion_(_parent->_fieldInfos->getIndexVersion()), hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx, indexVersion_) { @@ -73,7 +73,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) { df = 0; } else {// punt case df = ti->docFreq; - _doc = 0; + _doc = -1; freqBasePointer = ti->freqPointer; proxBasePointer = ti->proxPointer; skipPointer = freqBasePointer + ti->skipOffset; diff --git a/src/core/CLucene/index/_SegmentHeader.h b/src/core/CLucene/index/_SegmentHeader.h index bf988a2f27..c1f01e7cec 100644 --- a/src/core/CLucene/index/_SegmentHeader.h +++ b/src/core/CLucene/index/_SegmentHeader.h @@ -93,8 +93,8 @@ protected: int32_t count; int32_t df; CL_NS(util)::BitSet* deletedDocs; - int32_t _doc; - int32_t _freq; + int32_t _doc = -1; + int32_t _freq = 0; int32_t docs[PFOR_BLOCK_SIZE]; // buffered doc numbers int32_t freqs[PFOR_BLOCK_SIZE]; // buffered term freqs int32_t pointer; diff --git a/src/core/CLucene/search/query/DcoIdSetIterator.h b/src/core/CLucene/search/query/DcoIdSetIterator.h deleted file mode 100644 index 88aa431357..0000000000 --- a/src/core/CLucene/search/query/DcoIdSetIterator.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include "CLucene/index/DocRange.h" - -class DocIdSetIterator { -public: - DocIdSetIterator() = default; - virtual ~DocIdSetIterator() = default; - - virtual int32_t docID() = 0; - virtual int32_t nextDoc() = 0; - virtual int32_t advance(int32_t target) = 0; - - virtual int32_t docFreq() const = 0; - virtual bool readRange(DocRange* docRange) const = 0; -}; \ No newline at end of file diff --git a/src/core/CLucene/search/query/TermIterator.h b/src/core/CLucene/search/query/TermIterator.h index e0cf23a4fb..3eb22a254d 100644 --- a/src/core/CLucene/search/query/TermIterator.h +++ b/src/core/CLucene/search/query/TermIterator.h @@ -1,51 +1,54 @@ #pragma once -#include "CLucene/search/query/DcoIdSetIterator.h" #include "CLucene/index/Terms.h" #include <limits.h> +#include <cstdint> CL_NS_USE(index) -class TermIterator : public DocIdSetIterator { +class TermIterator { public: TermIterator() = default; - TermIterator(TermDocs* termDocs) : termDocs_(termDocs) { + TermIterator(TermDocs* termDocs) + : termDocs_(termDocs) { } - virtual ~TermIterator() = default; - - bool isEmpty() { + inline bool isEmpty() const { return termDocs_ == nullptr; } - int32_t docID() override { - uint32_t docId = termDocs_->doc(); + inline int32_t docID() const { + int32_t docId = termDocs_->doc(); return docId >= INT_MAX ? INT_MAX : docId; } - int32_t nextDoc() override { + inline int32_t freq() const { + return termDocs_->freq(); + } + + inline int32_t nextDoc() const { if (termDocs_->next()) { return termDocs_->doc(); } return INT_MAX; } - int32_t advance(int32_t target) override { + inline int32_t advance(int32_t target) const { if (termDocs_->skipTo(target)) { return termDocs_->doc(); } return INT_MAX; } - int32_t docFreq() const override { + inline int32_t docFreq() const { return termDocs_->docFreq(); } - bool readRange(DocRange* docRange) const override { + inline bool readRange(DocRange* docRange) const { return termDocs_->readRange(docRange); } -private: +protected: TermDocs* termDocs_ = nullptr; }; \ No newline at end of file diff --git a/src/core/CLucene/search/query/TermPositionIterator.h b/src/core/CLucene/search/query/TermPositionIterator.h new file mode 100644 index 0000000000..d64af4098f --- /dev/null +++ b/src/core/CLucene/search/query/TermPositionIterator.h @@ -0,0 +1,23 @@ +#pragma once + +#include "CLucene/search/query/TermIterator.h" +#include "CLucene/index/Terms.h" + +#include <limits.h> + +CL_NS_USE(index) + +class TermPositionIterator : public TermIterator { +public: + TermPositionIterator() = default; + TermPositionIterator(TermPositions* termPositions) + : TermIterator(termPositions), termPositions_(termPositions) { + } + + inline int32_t nextPosition() const { + return termPositions_->nextPosition(); + } + +private: + TermPositions* termPositions_ = nullptr; +}; \ No newline at end of file diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e1c13305aa..b9a09bb306 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -199,8 +199,8 @@ SET(clucene_core_Files ./CLucene/search/spans/SpanWeight.cpp ./CLucene/search/spans/SpanWeight.h ./CLucene/search/spans/TermSpans.cpp - ./CLucene/search/query/DcoIdSetIterator.h ./CLucene/search/query/TermIterator.h + ./CLucene/search/query/TermPositionIterator.h ) #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means rebuilding them for the core) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org