This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch revert-203-clucene in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
commit b8c1ef4de76b3fc3c5c0dbcdd3eaba43a226bb7f Author: qiye <jianliang5...@gmail.com> AuthorDate: Tue Mar 19 12:05:51 2024 +0800 Revert "[opt](chinese) chinese tokenizer lowercase interface (#203)" This reverts commit cf210eaaadc3ad5d7b27ff2e7b9635ad45cf227b. --- src/core/CLucene/index/IndexWriter.cpp | 18 +++--------------- src/core/CLucene/index/IndexWriter.h | 4 ++-- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/core/CLucene/index/IndexWriter.cpp b/src/core/CLucene/index/IndexWriter.cpp index 6b52e047f5..0d770182ba 100644 --- a/src/core/CLucene/index/IndexWriter.cpp +++ b/src/core/CLucene/index/IndexWriter.cpp @@ -1255,7 +1255,7 @@ void IndexWriter::resetMergeExceptions() { void IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_dirs, std::vector<lucene::store::Directory *> dest_dirs, std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec, - std::vector<uint32_t> dest_index_docs, bool maybe_skip) { + std::vector<uint32_t> dest_index_docs) { CND_CONDITION(src_dirs.size() > 0, "Source directory not found."); CND_CONDITION(dest_dirs.size() > 0, "Destination directory not found."); this->_trans_vec = std::move(trans_vec); @@ -1387,7 +1387,7 @@ void IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d } /// merge terms - mergeTerms(hasProx, maybe_skip); + mergeTerms(hasProx); /// merge null_bitmap mergeNullBitmap(srcNullBitmapValues, nullBitmapIndexOutputList); @@ -1613,7 +1613,7 @@ protected: }; -void IndexWriter::mergeTerms(bool hasProx, bool maybe_skip) { +void IndexWriter::mergeTerms(bool hasProx) { auto queue = _CLNEW SegmentMergeQueue(readers.size()); auto numSrcIndexes = readers.size(); //std::vector<TermPositions *> postingsList(numSrcIndexes); @@ -1664,18 +1664,6 @@ void IndexWriter::mergeTerms(bool hasProx, bool maybe_skip) { top = queue->top(); } - if (maybe_skip && smallestTerm) { - auto containsUpperCase = [](const std::wstring_view& ws_term) { - return std::any_of(ws_term.begin(), ws_term.end(), - [](wchar_t ch) { return std::iswupper(ch) != 0; }); - }; - - std::wstring_view ws_term(smallestTerm->text(), smallestTerm->textLength()); - if (containsUpperCase(ws_term)) { - _CLTHROWA(CL_ERR_InvalidState, "need rewrite, skip index compaction"); - } - } - std::vector<std::vector<uint32_t>> docDeltaBuffers(numDestIndexes); std::vector<std::vector<uint32_t>> freqBuffers(numDestIndexes); auto destPostingQueues = _CLNEW postingQueue(matchSize); diff --git a/src/core/CLucene/index/IndexWriter.h b/src/core/CLucene/index/IndexWriter.h index 0e8d40d8cc..7cfb67d2ca 100644 --- a/src/core/CLucene/index/IndexWriter.h +++ b/src/core/CLucene/index/IndexWriter.h @@ -317,14 +317,14 @@ public: void indexCompaction(std::vector<lucene::store::Directory*>& src_dirs, std::vector<lucene::store::Directory*> dest_dirs, std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec, - std::vector<uint32_t> dest_index_docs, bool maybe_skip = false); + std::vector<uint32_t> dest_index_docs); // create new fields info void mergeFields(bool hasProx); // write fields info file void writeFields(lucene::store::Directory* d, std::string segment); // merge terms and write files - void mergeTerms(bool hasProx, bool maybe_skip = false); + void mergeTerms(bool hasProx); // merge null_bitmap void mergeNullBitmap(std::vector<std::vector<uint32_t>> srcBitmapValues, std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org